From fdfd150ba01134e9544c7aaa3b6ce5060d8b6e61 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Mon, 24 Mar 2025 03:55:26 -0700 Subject: [PATCH 01/21] Implement onnx serialisation --- legateboost/models/base_model.py | 10 +++ legateboost/models/krr.py | 109 +++++++++++++++++++++++++++++++ legateboost/models/linear.py | 39 ++++++++++- legateboost/test/test_onnx.py | 35 ++++++++++ 4 files changed, 190 insertions(+), 3 deletions(-) create mode 100644 legateboost/test/test_onnx.py diff --git a/legateboost/models/base_model.py b/legateboost/models/base_model.py index 2a5df986..a1e88011 100644 --- a/legateboost/models/base_model.py +++ b/legateboost/models/base_model.py @@ -126,3 +126,13 @@ def __mul__(self, scalar: Any) -> "BaseModel": def __hash__(self) -> int: return hash(str(self)) + + def to_onnx(self) -> Any: + """Convert the model to an ONNX model. + + Returns + ------- + Any + The ONNX model. + """ + raise NotImplementedError diff --git a/legateboost/models/krr.py b/legateboost/models/krr.py index 16d7f445..6e6bd0ca 100644 --- a/legateboost/models/krr.py +++ b/legateboost/models/krr.py @@ -242,3 +242,112 @@ def __mul__(self, scalar: Any) -> "KRR": new = copy.deepcopy(self) self.betas_ *= scalar return new + + def to_onnx(self) -> Any: + from onnx import numpy_helper + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_tensor_value_info, + np_dtype_to_tensor_dtype, + ) + + assert self.X_train.dtype == self.betas_.dtype + + def make_constant_node(value, name): + return make_node( + "Constant", + inputs=[], + value=numpy_helper.from_array(value, name=name), + outputs=[name], + ) + + nodes = [] + + # model constants + betas = numpy_helper.from_array(self.betas_.__array__(), name="betas") + X_train = numpy_helper.from_array(self.X_train.__array__(), name="X_train") + + # pred inputs + X = make_tensor_value_info( + "X", + np_dtype_to_tensor_dtype(self.betas_.dtype), + [None, self.X_train.shape[1]], + ) + pred = make_tensor_value_info( + "pred", + np_dtype_to_tensor_dtype(self.betas_.dtype), + [None, self.betas_.shape[1]], + ) + + # exanded l2 distance + # distance = np.sum(X**2, axis=1)[:, np.newaxis] - 2 * np.dot(X, self.X_train.T) + # + np.sum(self.X_train**2, axis=1) + make_tensor_value_info( + "XX", np_dtype_to_tensor_dtype(self.betas_.dtype), [None] + ) + make_tensor_value_info( + "YY", + np_dtype_to_tensor_dtype(self.betas_.dtype), + [self.X_train.shape[0], 1], + ) + make_tensor_value_info( + "XY_reshaped", + np_dtype_to_tensor_dtype(self.betas_.dtype), + [1, self.X_train.shape[0]], + ) + make_tensor_value_info( + "XY", + np_dtype_to_tensor_dtype(self.betas_.dtype), + [None, self.X_train.shape[0]], + ) + nodes.append(make_constant_node(np.array([1]), "axis1")) + nodes.append(make_node("ReduceSumSquare", ["X", "axis1"], ["XX"])) + nodes.append(make_node("Gemm", ["X", "X_train"], ["XY"], alpha=-2.0, transB=1)) + nodes.append(make_node("ReduceSumSquare", ["X_train", "axis1"], ["YY"])) + nodes.append(make_constant_node(np.array([1, -1]), "reshape")) + nodes.append(make_node("Reshape", ["YY", "reshape"], ["YY_reshaped"])) + nodes.append(make_node("Add", ["XX", "XY"], ["add0"])) + make_tensor_value_info( + "l2", + np_dtype_to_tensor_dtype(self.betas_.dtype), + [None, self.X_train.shape[0]], + ) + nodes.append(make_node("Add", ["YY_reshaped", "add0"], ["l2"])) + nodes.append(make_constant_node(np.array([0.0], self.betas_.dtype), "zero")) + make_tensor_value_info( + "l2_clipped", + np_dtype_to_tensor_dtype(self.betas_.dtype), + [None, self.X_train.shape[0]], + ) + nodes.append(make_node("Max", ["l2", "zero"], ["l2_clipped"])) + + # RBF kernel + # K = np.exp(-distance / (2 * self.sigma**2)) + make_tensor_value_info( + "rbf0", + np_dtype_to_tensor_dtype(self.betas_.dtype), + [None, self.X_train.shape[0]], + ) + nodes.append( + make_constant_node( + np.array([-2.0 * self.sigma**2], self.betas_.dtype), "denominator" + ) + ) + nodes.append(make_node("Div", ["l2_clipped", "denominator"], ["rbf0"])) + make_tensor_value_info( + "K", + np_dtype_to_tensor_dtype(self.betas_.dtype), + [None, self.X_train.shape[0]], + ) + nodes.append(make_node("Exp", ["rbf0"], ["K"])) + + # prediction + # pred = np.dot(K, self.betas_) + nodes.append(make_node("MatMul", ["K", "betas"], ["pred"])) + graph = make_graph(nodes, "krr", [X], [pred], [betas, X_train]) + onnx_model = make_model(graph) + check_model(onnx_model) + return onnx_model diff --git a/legateboost/models/linear.py b/legateboost/models/linear.py index aad65c83..98a0b776 100644 --- a/legateboost/models/linear.py +++ b/legateboost/models/linear.py @@ -58,7 +58,7 @@ def __init__( self.l2_regularization = alpha def _fit_solve(self, X: cn.ndarray, g: cn.ndarray, h: cn.ndarray) -> None: - self.betas_ = cn.zeros((X.shape[1] + 1, g.shape[1])) + self.betas_ = cn.zeros((X.shape[1] + 1, g.shape[1]), dtype=X.dtype) num_outputs = g.shape[1] for k in range(num_outputs): W = cn.sqrt(h[:, k]) @@ -135,12 +135,13 @@ def batch_predict(models: Sequence[BaseModel], X: cn.ndarray) -> cn.ndarray: # summing together the coeffiecients of each model then predicting # saves a lot of work betas = cn.sum([model.betas_ for model in models], axis=0) - return betas[0] + X.dot(betas[1:].astype(X.dtype)) + betas = betas.astype(X.dtype) + return betas[0] + X.dot(betas[1:]) def __str__(self) -> str: return ( "Bias: " - + str(self.betas_[1]) + + str(self.betas_[0]) + "\nCoefficients: " + str(self.betas_[1:]) + "\n" @@ -150,3 +151,35 @@ def __mul__(self, scalar: Any) -> "Linear": new = copy.deepcopy(self) new.betas_ *= scalar return new + + def to_onnx(self) -> Any: + from onnx import numpy_helper + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_tensor_value_info, + np_dtype_to_tensor_dtype, + ) + + # model constants + betas = numpy_helper.from_array(self.betas_[1:].__array__(), name="betas") + intercept = numpy_helper.from_array( + self.betas_[0].__array__(), name="intercept" + ) + + # pred inputs + X = make_tensor_value_info( + "X", np_dtype_to_tensor_dtype(self.betas_.dtype), [None, None] + ) + pred = make_tensor_value_info( + "pred", np_dtype_to_tensor_dtype(self.betas_.dtype), [None] + ) + + node1 = make_node("MatMul", ["X", "betas"], ["XBeta"]) + node2 = make_node("Add", ["XBeta", "intercept"], ["pred"]) + graph = make_graph([node1, node2], "lr", [X], [pred], [betas, intercept]) + onnx_model = make_model(graph) + check_model(onnx_model) + return onnx_model diff --git a/legateboost/test/test_onnx.py b/legateboost/test/test_onnx.py new file mode 100644 index 00000000..4aae2223 --- /dev/null +++ b/legateboost/test/test_onnx.py @@ -0,0 +1,35 @@ +import numpy as np +import pytest +from onnx.reference import ReferenceEvaluator + +import cupynumeric as cn +import legateboost as lb + + +@pytest.mark.parametrize( + "Model", [M for M in lb.models.BaseModel.__subclasses__() if hasattr(M, "to_onnx")] +) +@pytest.mark.parametrize("n_outputs", [1, 5]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_onnx(Model, n_outputs, dtype): + rs = np.random.RandomState(0) + X = rs.random((1000, 10)).astype(dtype) + g = rs.normal(size=(X.shape[0], n_outputs)) + h = rs.random(g.shape) + 0.1 + model = ( + Model() + .set_random_state(np.random.RandomState(2)) + .fit(cn.array(X), cn.array(g), cn.array(h)) + ) + + def pred_onnx(onnx, X): + sess = ReferenceEvaluator(onnx) + pred = np.empty(X.shape[0], dtype=dtype) + feeds = {"X": X, "pred": pred} + return sess.run(None, feeds) + + assert np.allclose( + model.predict(X), + pred_onnx(model.to_onnx(), X)[0], + atol=1e-3 if dtype == np.float32 else 1e-6, + ) From be665582283802956d518db7a34f305f7c911cf3 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Wed, 26 Mar 2025 03:00:32 -0700 Subject: [PATCH 02/21] Implement tree models --- legateboost/models/tree.py | 143 +++++++++++++++++++++++++++++++--- legateboost/test/test_onnx.py | 2 +- 2 files changed, 134 insertions(+), 11 deletions(-) diff --git a/legateboost/models/tree.py b/legateboost/models/tree.py index 5dbde0fa..9a13a163 100644 --- a/legateboost/models/tree.py +++ b/legateboost/models/tree.py @@ -3,6 +3,8 @@ from enum import IntEnum from typing import Any, Callable, List, Sequence, Union, cast +import numpy as np + import cupynumeric as cn from legate.core import TaskTarget, get_legate_runtime, types @@ -90,6 +92,15 @@ def __init__( def num_nodes(self) -> int: return int(cn.sum(self.hessian > 0.0)) + def is_leaf(self, id: int) -> Any: + return self.feature[id] == -1 + + def left_child(self, id) -> int: + return id * 2 + 1 + + def right_child(self, id) -> int: + return id * 2 + 2 + def fit( self, X: cn.ndarray, @@ -108,7 +119,7 @@ def fit( h_ = get_store(h).promote(1, X.shape[1]) task.add_scalar_arg(self.max_depth, types.int32) - max_nodes = 2 ** (self.max_depth + 1) + max_nodes = 2 ** (self.max_depth + 1) - 1 task.add_scalar_arg(max_nodes, types.int32) task.add_scalar_arg(self.split_samples, types.int32) task.add_scalar_arg(self.random_state.randint(0, 2**31), types.int32) @@ -268,15 +279,6 @@ def batch_predict(models: Sequence[BaseModel], X: cn.ndarray) -> cn.ndarray: return cn.array(pred, copy=False) - def is_leaf(self, id: int) -> Any: - return self.feature[id] == -1 - - def left_child(self, id: int) -> int: - return id * 2 + 1 - - def right_child(self, id: int) -> int: - return id * 2 + 2 - def __str__(self) -> str: def format_vector(v: cn.ndarray) -> str: if cn.isscalar(v): @@ -313,3 +315,124 @@ def __mul__(self, scalar: Any) -> "Tree": new = copy.deepcopy(self) new.leaf_value *= scalar return new + + def to_onnx(self) -> Any: + import onnx + from onnx import numpy_helper + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_tensor, + make_tensor_value_info, + ) + + onnx_nodes = [] + + # We map the legate-boost tree representation to the TreeEnsemble ONNX operator + # the features array, splits array, and leaf weights can be passed unchanged + # ONNX then requires some extra arrays to represent the tree structure + # - nodes_truenodeidx is the index of the left child for a given node + # - nodes_falsenodeidx is the index of the right child for a given node + # - nodes_modes indicates that nodes use a <= comparison operator + # - nodes_trueleafs indicates that the left child is a leaf node + # - nodes_falseleafs indicates that the right child is a leaf node + # - leaf_targetids indicates which output the leaf node corresponds to + # ONNX does not support vector leaf so we will repeat the tree n_outputs + # times, each time with a different constant for leaf_targetids + # This is not ideal but I don't see a better way + + tree_max_nodes = self.feature.size + all_nodes_idx = np.arange(tree_max_nodes) + nodes_featureids = self.feature.__array__() + nodes_splits = numpy_helper.from_array(self.split_value.__array__()) + nodes_truenodeids = self.left_child(all_nodes_idx) + # get the left child of each node and check if it is a leaf + # if the node is already leaf then its child can go off the end of the array + # use np.minimum to avoid this + nodes_trueleafs = self.is_leaf( + np.minimum(tree_max_nodes - 1, self.left_child(all_nodes_idx)) + ).astype(int) + nodes_falsenodeids = self.right_child(all_nodes_idx) + nodes_falseleafs = self.is_leaf( + np.minimum(tree_max_nodes - 1, self.right_child(all_nodes_idx)) + ).astype(int) + + for output_idx in range(0, self.leaf_value.shape[1]): + leaf_targetids = np.full(self.feature.size, output_idx, dtype=np.int64) + leaf_weights = numpy_helper.from_array( + self.leaf_value[:, output_idx].__array__() + ) + + onnx_nodes.append( + onnx.helper.make_node( + "TreeEnsemble", + ["X"], + ["pred" + str(output_idx)], + domain="ai.onnx.ml", + n_targets=self.leaf_value.shape[1], + membership_values=None, + nodes_missing_value_tracks_true=None, + nodes_hitrates=None, + aggregate_function=1, + post_transform=0, + tree_roots=[0], + nodes_modes=make_tensor( + "nodes_modes", + onnx.TensorProto.UINT8, + self.feature.shape, + np.zeros_like(self.feature, dtype=np.uint8), + ), + nodes_featureids=nodes_featureids, + nodes_splits=nodes_splits, + nodes_truenodeids=nodes_truenodeids, + nodes_trueleafs=nodes_trueleafs, + nodes_falsenodeids=nodes_falsenodeids, + nodes_falseleafs=nodes_falseleafs, + leaf_targetids=leaf_targetids, + leaf_weights=leaf_weights, + ) + ) + + if output_idx == 0: + accumulated_pred = make_tensor_value_info( + "accumulated_pred0", onnx.TensorProto.DOUBLE, [None, None] + ) + onnx_nodes.append( + onnx.helper.make_node( + "Identity", + ["pred" + str(output_idx)], + ["accumulated_pred0"], + ) + ) + else: + accumulated_pred = make_tensor_value_info( + "accumulated_pred" + str(output_idx), + onnx.TensorProto.DOUBLE, + [None, None], + ) + onnx_nodes.append( + onnx.helper.make_node( + "Add", + [ + "accumulated_pred" + str(output_idx - 1), + "pred" + str(output_idx), + ], + ["accumulated_pred" + str(output_idx)], + ) + ) + + # pred inputs + X = make_tensor_value_info("X", onnx.TensorProto.DOUBLE, [None, None]) + graph = make_graph( + onnx_nodes, "legateboost.models.Tree", [X], [accumulated_pred] + ) + model = make_model( + graph, + opset_imports=[ + onnx.helper.make_opsetid("ai.onnx.ml", 5), + onnx.helper.make_opsetid("", 14), + ], + ) + check_model(model) + return model diff --git a/legateboost/test/test_onnx.py b/legateboost/test/test_onnx.py index 4aae2223..8710eaf4 100644 --- a/legateboost/test/test_onnx.py +++ b/legateboost/test/test_onnx.py @@ -29,7 +29,7 @@ def pred_onnx(onnx, X): return sess.run(None, feeds) assert np.allclose( - model.predict(X), + model.predict(cn.array(X)), pred_onnx(model.to_onnx(), X)[0], atol=1e-3 if dtype == np.float32 else 1e-6, ) From 8e20945498ad8854b74cb0a4b12794de8e351f18 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Wed, 26 Mar 2025 03:46:13 -0700 Subject: [PATCH 03/21] Implement neural network onnx op --- legateboost/models/krr.py | 4 +- legateboost/models/linear.py | 4 +- legateboost/models/nn.py | 84 ++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 2 deletions(-) diff --git a/legateboost/models/krr.py b/legateboost/models/krr.py index 6e6bd0ca..3f35ec5b 100644 --- a/legateboost/models/krr.py +++ b/legateboost/models/krr.py @@ -347,7 +347,9 @@ def make_constant_node(value, name): # prediction # pred = np.dot(K, self.betas_) nodes.append(make_node("MatMul", ["K", "betas"], ["pred"])) - graph = make_graph(nodes, "krr", [X], [pred], [betas, X_train]) + graph = make_graph( + nodes, "legateboost.model.KRR", [X], [pred], [betas, X_train] + ) onnx_model = make_model(graph) check_model(onnx_model) return onnx_model diff --git a/legateboost/models/linear.py b/legateboost/models/linear.py index 98a0b776..ec34594e 100644 --- a/legateboost/models/linear.py +++ b/legateboost/models/linear.py @@ -179,7 +179,9 @@ def to_onnx(self) -> Any: node1 = make_node("MatMul", ["X", "betas"], ["XBeta"]) node2 = make_node("Add", ["XBeta", "intercept"], ["pred"]) - graph = make_graph([node1, node2], "lr", [X], [pred], [betas, intercept]) + graph = make_graph( + [node1, node2], "legateboost.model.Linear", [X], [pred], [betas, intercept] + ) onnx_model = make_model(graph) check_model(onnx_model) return onnx_model diff --git a/legateboost/models/nn.py b/legateboost/models/nn.py index eb499b0d..356f0264 100644 --- a/legateboost/models/nn.py +++ b/legateboost/models/nn.py @@ -181,3 +181,87 @@ def __mul__(self, scalar: Any) -> "NN": new.coefficients_[-1] *= scalar new.biases_[-1] *= scalar return new + + def to_onnx(self) -> Any: + from onnx import numpy_helper + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_tensor_value_info, + np_dtype_to_tensor_dtype, + ) + + # model constants + biases = [ + numpy_helper.from_array(b[0].__array__(), name=f"bias{i}") + for i, b in enumerate(self.biases_) + ] + coefficients = [ + numpy_helper.from_array(c.__array__(), name=f"coefficients{i}") + for i, c in enumerate(self.coefficients_) + ] + + # pred inputs + X = make_tensor_value_info( + "X", + np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), + [None, self.coefficients_[0].shape[0]], + ) + + nodes = [] + + make_tensor_value_info( + "activations0", + np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), + [None, None], + ) + nodes.append(make_node("MatMul", ["X", "coefficients0"], ["activations0"])) + activations_with_bias = make_tensor_value_info( + "activations0withbias", + np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), + [None, None], + ) + nodes.append( + make_node("Add", ["activations0", "bias0"], ["activations0withbias"]) + ) + + for i in range(1, len(coefficients)): + make_tensor_value_info( + f"tanh{i}", + np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), + [None, None], + ) + nodes.append(make_node("Tanh", [f"activations{i-1}withbias"], [f"tanh{i}"])) + make_tensor_value_info( + f"activations{i}", + np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), + [None, None], + ) + nodes.append( + make_node( + "MatMul", [f"tanh{i}", f"coefficients{i}"], [f"activations{i}"] + ) + ) + activations_with_bias = make_tensor_value_info( + f"activations{i}withbias", + np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), + [None, None], + ) + nodes.append( + make_node( + "Add", [f"activations{i}", f"bias{i}"], [f"activations{i}withbias"] + ) + ) + + graph = make_graph( + nodes, + "legateboost.model.NN", + [X], + [activations_with_bias], + biases + coefficients, + ) + onnx_model = make_model(graph) + check_model(onnx_model) + return onnx_model From 821e7f48c31c4ec6e2669b785bfa602e1a58c6de Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Wed, 26 Mar 2025 04:01:42 -0700 Subject: [PATCH 04/21] mypy --- conda/environments/all_cuda-122.yaml | 2 ++ dependencies.yaml | 2 ++ legateboost/models/krr.py | 4 +++- legateboost/models/tree.py | 6 +++--- pyproject.toml | 2 ++ 5 files changed, 12 insertions(+), 4 deletions(-) diff --git a/conda/environments/all_cuda-122.yaml b/conda/environments/all_cuda-122.yaml index f84146d2..68eb3824 100644 --- a/conda/environments/all_cuda-122.yaml +++ b/conda/environments/all_cuda-122.yaml @@ -28,6 +28,8 @@ dependencies: - ninja>=1.11.1.1 - notebook>=7 - numpy +- onnx>=1.10 +- onnxmltools>=1.10 - openblas - pydata-sphinx-theme>=0.16 - pytest>=7,<8 diff --git a/dependencies.yaml b/dependencies.yaml index 78b33351..9cd93fba 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -176,3 +176,5 @@ dependencies: - pytest>=7,<8 - seaborn>=0.13 - xgboost>=2.0 + - onnx>=1.10 + - onnxmltools>=1.10 diff --git a/legateboost/models/krr.py b/legateboost/models/krr.py index 3f35ec5b..31af8d5a 100644 --- a/legateboost/models/krr.py +++ b/legateboost/models/krr.py @@ -256,7 +256,7 @@ def to_onnx(self) -> Any: assert self.X_train.dtype == self.betas_.dtype - def make_constant_node(value, name): + def make_constant_node(value: cn.array, name: str) -> Any: return make_node( "Constant", inputs=[], @@ -331,6 +331,8 @@ def make_constant_node(value, name): np_dtype_to_tensor_dtype(self.betas_.dtype), [None, self.X_train.shape[0]], ) + if self.sigma is None: + raise ValueError("sigma is None. Has fit been called?") nodes.append( make_constant_node( np.array([-2.0 * self.sigma**2], self.betas_.dtype), "denominator" diff --git a/legateboost/models/tree.py b/legateboost/models/tree.py index 9a13a163..38cfda93 100644 --- a/legateboost/models/tree.py +++ b/legateboost/models/tree.py @@ -92,13 +92,13 @@ def __init__( def num_nodes(self) -> int: return int(cn.sum(self.hessian > 0.0)) - def is_leaf(self, id: int) -> Any: + def is_leaf(self, id: cn.array) -> cn.array: return self.feature[id] == -1 - def left_child(self, id) -> int: + def left_child(self, id: cn.array) -> cn.array: return id * 2 + 1 - def right_child(self, id) -> int: + def right_child(self, id: cn.array) -> cn.array: return id * 2 + 2 def fit( diff --git a/pyproject.toml b/pyproject.toml index 9b68d931..a69041c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,8 @@ test = [ "mypy>=1.13", "nbconvert>=7.16", "notebook>=7", + "onnx>=1.10", + "onnxmltools>=1.10", "pytest>=7,<8", "seaborn>=0.13", "xgboost>=2.0", From b9be42ee492388c4b80371e51a8fcee7177a88d6 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Mon, 31 Mar 2025 07:31:55 -0700 Subject: [PATCH 05/21] Add interface to estimator --- legateboost/legateboost.py | 96 +++++++++++++++++++++++++++++++++++ legateboost/models/krr.py | 91 ++++++++++++++------------------- legateboost/models/linear.py | 45 ++++++++++++---- legateboost/models/nn.py | 76 +++++++++++++++------------ legateboost/models/tree.py | 55 +++++++++++++------- legateboost/test/test_onnx.py | 65 +++++++++++++++++++----- 6 files changed, 302 insertions(+), 126 deletions(-) diff --git a/legateboost/legateboost.py b/legateboost/legateboost.py index 80c8beb2..2c095a53 100644 --- a/legateboost/legateboost.py +++ b/legateboost/legateboost.py @@ -540,6 +540,102 @@ def dump_models(self) -> str: text += str(m) return text + def _make_onnx_init(self, X_dtype): + # turn self.model_init_ into an ONNX model + from onnx import numpy_helper + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + np_dtype_to_tensor_dtype, + ) + + # model constants + X_in = make_tensor_value_info( + "X_in", np_dtype_to_tensor_dtype(X_dtype), [None, self.n_features_in_] + ) + nodes = [] + nodes.append(make_node("Shape", ["X_in"], ["n_rows"], end=1)) + one = numpy_helper.from_array(np.array([1], dtype=np.int64), name="one") + nodes.append(make_node("Concat", ["n_rows", "one"], ["tile_repeat"], axis=0)) + init = numpy_helper.from_array( + np.atleast_2d(self.model_init_.__array__().astype(X_dtype)), name="init" + ) + prediction_out = make_tensor_value_info( + "predictions_out", + np_dtype_to_tensor_dtype(X_dtype), + [None, self.model_init_.shape[0]], + ) + nodes.append(make_node("Tile", ["init", "tile_repeat"], ["predictions_out"])) + X_out = make_tensor_value_info( + "X_out", + np_dtype_to_tensor_dtype(X_dtype), + [None, self.model_init_.shape[0]], + ) + nodes.append(make_node("Identity", ["X_in"], ["X_out"])) + graph = make_graph( + nodes, + "legateboost estimator init", + [X_in], + [X_out, prediction_out], + [init, one], + ) + onnx_model = make_model( + graph, + opset_imports=[ + make_opsetid("", 21), + ], + ) + check_model(onnx_model) + + return onnx_model + + def to_onnx(self, X_dtype, predict_function="predict"): + """Converts the model to an ONNX model. + + Parameters + ---------- + X_dtype : numpy.dtype + The expected data type of the input data. ONNX models hard + code the data type of the input data and will crash if this is + not set correctly. + Can be np.float32 or np.float64. + predict_function : str + The serialised ONNX model can produce output equivalent to 'predict', + 'predict_proba', or 'predict_raw'. + The default is "predict". + Returns + ------- + Any + The ONNX model. + """ + from onnx.compose import merge_models + + model = self._make_onnx_init(X_dtype) + if self.models_ is not None and len(self.models_) > 0: + model = merge_models( + model, + self.models_[0].to_onnx(X_dtype), + io_map=[("X_out", "X_in"), ("predictions_out", "predictions_in")], + prefix2="model_0_", + ) + + for i in range(1, len(self.models_)): + model = merge_models( + model, + self.models_[i].to_onnx(X_dtype), + io_map=[ + ("model_{}_X_out".format(i - 1), "X_in"), + ("model_{}_predictions_out".format(i - 1), "predictions_in"), + ], + prefix2="model_{}_".format(i), + ) + + return model + def global_attributions( self, X: cn.array, diff --git a/legateboost/models/krr.py b/legateboost/models/krr.py index 31af8d5a..cbea5a22 100644 --- a/legateboost/models/krr.py +++ b/legateboost/models/krr.py @@ -243,13 +243,14 @@ def __mul__(self, scalar: Any) -> "KRR": self.betas_ *= scalar return new - def to_onnx(self) -> Any: + def to_onnx(self, X_dtype) -> Any: from onnx import numpy_helper from onnx.checker import check_model from onnx.helper import ( make_graph, make_model, make_node, + make_opsetid, make_tensor_value_info, np_dtype_to_tensor_dtype, ) @@ -271,66 +272,34 @@ def make_constant_node(value: cn.array, name: str) -> Any: X_train = numpy_helper.from_array(self.X_train.__array__(), name="X_train") # pred inputs - X = make_tensor_value_info( - "X", - np_dtype_to_tensor_dtype(self.betas_.dtype), - [None, self.X_train.shape[1]], + n_features = self.X_train.shape[1] + n_outputs = self.betas_.shape[1] + X_in = make_tensor_value_info( + "X_in", np_dtype_to_tensor_dtype(self.betas_.dtype), [None, n_features] ) - pred = make_tensor_value_info( - "pred", + predictions_in = make_tensor_value_info( + "predictions_in", np_dtype_to_tensor_dtype(self.betas_.dtype), - [None, self.betas_.shape[1]], + [None, n_outputs], ) - # exanded l2 distance # distance = np.sum(X**2, axis=1)[:, np.newaxis] - 2 * np.dot(X, self.X_train.T) # + np.sum(self.X_train**2, axis=1) - make_tensor_value_info( - "XX", np_dtype_to_tensor_dtype(self.betas_.dtype), [None] - ) - make_tensor_value_info( - "YY", - np_dtype_to_tensor_dtype(self.betas_.dtype), - [self.X_train.shape[0], 1], - ) - make_tensor_value_info( - "XY_reshaped", - np_dtype_to_tensor_dtype(self.betas_.dtype), - [1, self.X_train.shape[0]], - ) - make_tensor_value_info( - "XY", - np_dtype_to_tensor_dtype(self.betas_.dtype), - [None, self.X_train.shape[0]], - ) nodes.append(make_constant_node(np.array([1]), "axis1")) - nodes.append(make_node("ReduceSumSquare", ["X", "axis1"], ["XX"])) - nodes.append(make_node("Gemm", ["X", "X_train"], ["XY"], alpha=-2.0, transB=1)) + nodes.append(make_node("ReduceSumSquare", ["X_in", "axis1"], ["XX"])) + nodes.append( + make_node("Gemm", ["X_in", "X_train"], ["XY"], alpha=-2.0, transB=1) + ) nodes.append(make_node("ReduceSumSquare", ["X_train", "axis1"], ["YY"])) nodes.append(make_constant_node(np.array([1, -1]), "reshape")) nodes.append(make_node("Reshape", ["YY", "reshape"], ["YY_reshaped"])) nodes.append(make_node("Add", ["XX", "XY"], ["add0"])) - make_tensor_value_info( - "l2", - np_dtype_to_tensor_dtype(self.betas_.dtype), - [None, self.X_train.shape[0]], - ) nodes.append(make_node("Add", ["YY_reshaped", "add0"], ["l2"])) nodes.append(make_constant_node(np.array([0.0], self.betas_.dtype), "zero")) - make_tensor_value_info( - "l2_clipped", - np_dtype_to_tensor_dtype(self.betas_.dtype), - [None, self.X_train.shape[0]], - ) nodes.append(make_node("Max", ["l2", "zero"], ["l2_clipped"])) # RBF kernel # K = np.exp(-distance / (2 * self.sigma**2)) - make_tensor_value_info( - "rbf0", - np_dtype_to_tensor_dtype(self.betas_.dtype), - [None, self.X_train.shape[0]], - ) if self.sigma is None: raise ValueError("sigma is None. Has fit been called?") nodes.append( @@ -339,19 +308,37 @@ def make_constant_node(value: cn.array, name: str) -> Any: ) ) nodes.append(make_node("Div", ["l2_clipped", "denominator"], ["rbf0"])) - make_tensor_value_info( - "K", - np_dtype_to_tensor_dtype(self.betas_.dtype), - [None, self.X_train.shape[0]], - ) nodes.append(make_node("Exp", ["rbf0"], ["K"])) # prediction # pred = np.dot(K, self.betas_) - nodes.append(make_node("MatMul", ["K", "betas"], ["pred"])) + nodes.append(make_node("MatMul", ["K", "betas"], ["dot"])) + + # outputs + predictions_out = make_tensor_value_info( + "predictions_out", + np_dtype_to_tensor_dtype(self.betas_.dtype), + [None, n_outputs], + ) + X_out = make_tensor_value_info( + "X_out", np_dtype_to_tensor_dtype(self.betas_.dtype), [None, n_features] + ) + + nodes.append(make_node("Add", ["dot", "predictions_in"], ["predictions_out"])) + nodes.append(make_node("Identity", ["X_in"], ["X_out"])) + graph = make_graph( - nodes, "legateboost.model.KRR", [X], [pred], [betas, X_train] + nodes, + "legateboost.model.KRR", + [X_in, predictions_in], + [X_out, predictions_out], + [betas, X_train], + ) + onnx_model = make_model( + graph, + opset_imports=[ + make_opsetid("", 21), + ], ) - onnx_model = make_model(graph) check_model(onnx_model) return onnx_model diff --git a/legateboost/models/linear.py b/legateboost/models/linear.py index ec34594e..90e1ee4c 100644 --- a/legateboost/models/linear.py +++ b/legateboost/models/linear.py @@ -152,13 +152,14 @@ def __mul__(self, scalar: Any) -> "Linear": new.betas_ *= scalar return new - def to_onnx(self) -> Any: + def to_onnx(self, X_dtype) -> Any: from onnx import numpy_helper from onnx.checker import check_model from onnx.helper import ( make_graph, make_model, make_node, + make_opsetid, make_tensor_value_info, np_dtype_to_tensor_dtype, ) @@ -170,18 +171,44 @@ def to_onnx(self) -> Any: ) # pred inputs - X = make_tensor_value_info( - "X", np_dtype_to_tensor_dtype(self.betas_.dtype), [None, None] + n_features = self.betas_.shape[0] - 1 + n_outputs = self.betas_.shape[1] + X_in = make_tensor_value_info( + "X_in", np_dtype_to_tensor_dtype(self.betas_.dtype), [None, n_features] ) - pred = make_tensor_value_info( - "pred", np_dtype_to_tensor_dtype(self.betas_.dtype), [None] + predictions_in = make_tensor_value_info( + "predictions_in", + np_dtype_to_tensor_dtype(self.betas_.dtype), + [None, n_outputs], + ) + predictions_out = make_tensor_value_info( + "predictions_out", + np_dtype_to_tensor_dtype(self.betas_.dtype), + [None, n_outputs], ) - node1 = make_node("MatMul", ["X", "betas"], ["XBeta"]) - node2 = make_node("Add", ["XBeta", "intercept"], ["pred"]) + nodes = [] + nodes.append(make_node("MatMul", ["X_in", "betas"], ["XBeta"])) + nodes.append(make_node("Add", ["XBeta", "intercept"], ["result"])) + nodes.append( + make_node("Add", ["result", "predictions_in"], ["predictions_out"]) + ) + X_out = make_tensor_value_info( + "X_out", np_dtype_to_tensor_dtype(self.betas_.dtype), [None, n_features] + ) + nodes.append(make_node("Identity", ["X_in"], ["X_out"])) graph = make_graph( - [node1, node2], "legateboost.model.Linear", [X], [pred], [betas, intercept] + nodes, + "legateboost.model.Linear", + [X_in, predictions_in], + [X_out, predictions_out], + [betas, intercept], + ) + onnx_model = make_model( + graph, + opset_imports=[ + make_opsetid("", 21), + ], ) - onnx_model = make_model(graph) check_model(onnx_model) return onnx_model diff --git a/legateboost/models/nn.py b/legateboost/models/nn.py index 356f0264..d77e4b24 100644 --- a/legateboost/models/nn.py +++ b/legateboost/models/nn.py @@ -182,13 +182,14 @@ def __mul__(self, scalar: Any) -> "NN": new.biases_[-1] *= scalar return new - def to_onnx(self) -> Any: + def to_onnx(self, X_dtype) -> Any: from onnx import numpy_helper from onnx.checker import check_model from onnx.helper import ( make_graph, make_model, make_node, + make_opsetid, make_tensor_value_info, np_dtype_to_tensor_dtype, ) @@ -204,64 +205,73 @@ def to_onnx(self) -> Any: ] # pred inputs - X = make_tensor_value_info( - "X", + n_outputs = self.coefficients_[-1].shape[1] + n_features = self.coefficients_[0].shape[0] + X_in = make_tensor_value_info( + "X_in", np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), [None, self.coefficients_[0].shape[0]], ) - - nodes = [] - - make_tensor_value_info( - "activations0", - np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), - [None, None], - ) - nodes.append(make_node("MatMul", ["X", "coefficients0"], ["activations0"])) - activations_with_bias = make_tensor_value_info( - "activations0withbias", + predictions_in = make_tensor_value_info( + "predictions_in", np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), - [None, None], + [None, n_outputs], ) + nodes = [] + + nodes.append(make_node("MatMul", ["X_in", "coefficients0"], ["activations0"])) nodes.append( make_node("Add", ["activations0", "bias0"], ["activations0withbias"]) ) for i in range(1, len(coefficients)): - make_tensor_value_info( - f"tanh{i}", - np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), - [None, None], - ) nodes.append(make_node("Tanh", [f"activations{i-1}withbias"], [f"tanh{i}"])) - make_tensor_value_info( - f"activations{i}", - np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), - [None, None], - ) nodes.append( make_node( "MatMul", [f"tanh{i}", f"coefficients{i}"], [f"activations{i}"] ) ) - activations_with_bias = make_tensor_value_info( - f"activations{i}withbias", - np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), - [None, None], - ) nodes.append( make_node( "Add", [f"activations{i}", f"bias{i}"], [f"activations{i}withbias"] ) ) + # outputs + X_out = make_tensor_value_info( + "X_out", + np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), + [None, n_features], + ) + nodes.append(make_node("Identity", ["X_in"], ["X_out"])) + predictions_out = make_tensor_value_info( + "predictions_out", + np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), + [None, n_outputs], + ) + nodes.append( + make_node( + "Add", + [ + "activations{}withbias".format(len(self.coefficients_) - 1), + "predictions_in", + ], + ["predictions_out"], + ) + ) + graph = make_graph( nodes, "legateboost.model.NN", - [X], - [activations_with_bias], + [X_in, predictions_in], + [X_out, predictions_out], biases + coefficients, ) - onnx_model = make_model(graph) + onnx_model = make_model( + graph, + opset_imports=[ + make_opsetid("", 21), + ], + ) check_model(onnx_model) return onnx_model diff --git a/legateboost/models/tree.py b/legateboost/models/tree.py index 38cfda93..c9756992 100644 --- a/legateboost/models/tree.py +++ b/legateboost/models/tree.py @@ -316,15 +316,17 @@ def __mul__(self, scalar: Any) -> "Tree": new.leaf_value *= scalar return new - def to_onnx(self) -> Any: + def to_onnx(self, X_dtype) -> Any: import onnx from onnx import numpy_helper from onnx.checker import check_model from onnx.helper import ( make_graph, make_model, + make_node, make_tensor, make_tensor_value_info, + np_dtype_to_tensor_dtype, ) onnx_nodes = [] @@ -345,7 +347,9 @@ def to_onnx(self) -> Any: tree_max_nodes = self.feature.size all_nodes_idx = np.arange(tree_max_nodes) nodes_featureids = self.feature.__array__() - nodes_splits = numpy_helper.from_array(self.split_value.__array__()) + nodes_splits = numpy_helper.from_array( + self.split_value.__array__().astype(X_dtype) + ) nodes_truenodeids = self.left_child(all_nodes_idx) # get the left child of each node and check if it is a leaf # if the node is already leaf then its child can go off the end of the array @@ -357,17 +361,17 @@ def to_onnx(self) -> Any: nodes_falseleafs = self.is_leaf( np.minimum(tree_max_nodes - 1, self.right_child(all_nodes_idx)) ).astype(int) - - for output_idx in range(0, self.leaf_value.shape[1]): + num_outputs = self.leaf_value.shape[1] + for output_idx in range(0, num_outputs): leaf_targetids = np.full(self.feature.size, output_idx, dtype=np.int64) leaf_weights = numpy_helper.from_array( - self.leaf_value[:, output_idx].__array__() + self.leaf_value[:, output_idx].__array__().astype(X_dtype) ) onnx_nodes.append( - onnx.helper.make_node( + make_node( "TreeEnsemble", - ["X"], + ["X_in"], ["pred" + str(output_idx)], domain="ai.onnx.ml", n_targets=self.leaf_value.shape[1], @@ -395,9 +399,6 @@ def to_onnx(self) -> Any: ) if output_idx == 0: - accumulated_pred = make_tensor_value_info( - "accumulated_pred0", onnx.TensorProto.DOUBLE, [None, None] - ) onnx_nodes.append( onnx.helper.make_node( "Identity", @@ -406,11 +407,6 @@ def to_onnx(self) -> Any: ) ) else: - accumulated_pred = make_tensor_value_info( - "accumulated_pred" + str(output_idx), - onnx.TensorProto.DOUBLE, - [None, None], - ) onnx_nodes.append( onnx.helper.make_node( "Add", @@ -422,16 +418,37 @@ def to_onnx(self) -> Any: ) ) - # pred inputs - X = make_tensor_value_info("X", onnx.TensorProto.DOUBLE, [None, None]) + X_in = make_tensor_value_info( + "X_in", np_dtype_to_tensor_dtype(X_dtype), [None, None] + ) + X_out = make_tensor_value_info( + "X_out", np_dtype_to_tensor_dtype(X_dtype), [None, None] + ) + predictions_in = make_tensor_value_info( + "predictions_in", np_dtype_to_tensor_dtype(X_dtype), [None, num_outputs] + ) + predictions_out = make_tensor_value_info( + "predictions_out", np_dtype_to_tensor_dtype(X_dtype), [None, num_outputs] + ) + onnx_nodes.append(make_node("Identity", ["X_in"], ["X_out"])) + onnx_nodes.append( + make_node( + "Add", + ["predictions_in", "accumulated_pred" + str(num_outputs - 1)], + ["predictions_out"], + ) + ) graph = make_graph( - onnx_nodes, "legateboost.models.Tree", [X], [accumulated_pred] + onnx_nodes, + "legateboost.models.Tree", + [X_in, predictions_in], + [X_out, predictions_out], ) model = make_model( graph, opset_imports=[ onnx.helper.make_opsetid("ai.onnx.ml", 5), - onnx.helper.make_opsetid("", 14), + onnx.helper.make_opsetid("", 21), ], ) check_model(model) diff --git a/legateboost/test/test_onnx.py b/legateboost/test/test_onnx.py index 8710eaf4..f52ea1b5 100644 --- a/legateboost/test/test_onnx.py +++ b/legateboost/test/test_onnx.py @@ -1,17 +1,30 @@ import numpy as np +import onnxruntime as ort import pytest -from onnx.reference import ReferenceEvaluator import cupynumeric as cn import legateboost as lb -@pytest.mark.parametrize( - "Model", [M for M in lb.models.BaseModel.__subclasses__() if hasattr(M, "to_onnx")] -) +def pred_onnx_estimator(onnx, X, n_outputs): + sess = ort.InferenceSession(onnx.SerializeToString()) + feeds = {"X_in": X} + return sess.run(None, feeds)[1] + + +def pred_onnx_model(onnx, X, n_outputs): + sess = ort.InferenceSession(onnx.SerializeToString()) + feeds = { + "X_in": X, + "predictions_in": np.zeros((X.shape[0], n_outputs), dtype=X.dtype), + } + return sess.run(None, feeds)[1] + + +@pytest.mark.parametrize("Model", [M for M in lb.models.BaseModel.__subclasses__()]) @pytest.mark.parametrize("n_outputs", [1, 5]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_onnx(Model, n_outputs, dtype): +def test_models(Model, n_outputs, dtype): rs = np.random.RandomState(0) X = rs.random((1000, 10)).astype(dtype) g = rs.normal(size=(X.shape[0], n_outputs)) @@ -22,14 +35,40 @@ def test_onnx(Model, n_outputs, dtype): .fit(cn.array(X), cn.array(g), cn.array(h)) ) - def pred_onnx(onnx, X): - sess = ReferenceEvaluator(onnx) - pred = np.empty(X.shape[0], dtype=dtype) - feeds = {"X": X, "pred": pred} - return sess.run(None, feeds) + onnx_pred = pred_onnx_model(model.to_onnx(X.dtype), X, n_outputs) + lb_pred = model.predict(cn.array(X)) + assert onnx_pred.shape == lb_pred.shape + assert np.allclose(onnx_pred, lb_pred, atol=1e-3 if dtype == np.float32 else 1e-6) + + +@pytest.mark.parametrize("n_outputs", [1, 5]) +def test_init(n_outputs): + # ONNX correctly outputs model init + X = np.array([[1, 2], [3, 4], [5, 6]], dtype=np.float32) + y = np.full((3, n_outputs), 5.0, dtype=np.float32) + estimator = lb.LBRegressor(n_estimators=0, random_state=0).fit(X, y) + assert np.all(estimator.model_init_ == 5.0) + assert np.all(estimator.predict(X) == 5.0) + assert np.all( + pred_onnx_estimator(estimator.to_onnx(X.dtype), X.__array__(), 1) == 5.0 + ) + + +@pytest.mark.parametrize("Model", [M for M in lb.models.BaseModel.__subclasses__()]) +@pytest.mark.parametrize("n_outputs", [1, 5]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_estimator(Model, n_outputs, dtype): + rs = np.random.RandomState(0) + X = rs.random((1000, 10)).astype(dtype) + y = rs.random((1000, n_outputs)).astype(dtype) + model = lb.LBRegressor( + n_estimators=10, + base_models=(Model(),), + random_state=0, + ).fit(X, y) assert np.allclose( - model.predict(cn.array(X)), - pred_onnx(model.to_onnx(), X)[0], - atol=1e-3 if dtype == np.float32 else 1e-6, + model.predict(X), + pred_onnx_estimator(model.to_onnx(X.dtype), X.__array__(), 1).squeeze(), + atol=1e-3, ) From 5e3a0b69acd6802ea72349a59616492621528f74 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Mon, 31 Mar 2025 07:32:47 -0700 Subject: [PATCH 06/21] Add interface to estimator --- conda/environments/all_cuda-122.yaml | 1 + dependencies.yaml | 1 + pyproject.toml | 1 + 3 files changed, 3 insertions(+) diff --git a/conda/environments/all_cuda-122.yaml b/conda/environments/all_cuda-122.yaml index 68eb3824..cf9bd702 100644 --- a/conda/environments/all_cuda-122.yaml +++ b/conda/environments/all_cuda-122.yaml @@ -30,6 +30,7 @@ dependencies: - numpy - onnx>=1.10 - onnxmltools>=1.10 +- onnxruntime>=1.21 - openblas - pydata-sphinx-theme>=0.16 - pytest>=7,<8 diff --git a/dependencies.yaml b/dependencies.yaml index 9cd93fba..d7501dbd 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -178,3 +178,4 @@ dependencies: - xgboost>=2.0 - onnx>=1.10 - onnxmltools>=1.10 + - onnxruntime>=1.21 diff --git a/pyproject.toml b/pyproject.toml index a69041c6..f6ab85c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ test = [ "notebook>=7", "onnx>=1.10", "onnxmltools>=1.10", + "onnxruntime>=1.21", "pytest>=7,<8", "seaborn>=0.13", "xgboost>=2.0", From 7bf50187015654e49bb15a71fa69449a48053990 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Wed, 2 Apr 2025 01:54:45 -0700 Subject: [PATCH 07/21] Increase test coverage --- legateboost/legateboost.py | 52 +++++++----- legateboost/models/tree.py | 8 ++ legateboost/objectives.py | 13 ++- legateboost/test/test_onnx.py | 104 ++++++++++++++++------- legateboost/test/test_with_hypothesis.py | 51 ++++++++--- 5 files changed, 160 insertions(+), 68 deletions(-) diff --git a/legateboost/legateboost.py b/legateboost/legateboost.py index 2c095a53..016bb77e 100644 --- a/legateboost/legateboost.py +++ b/legateboost/legateboost.py @@ -16,7 +16,7 @@ from .input_validation import _lb_check_X, _lb_check_X_y, check_sample_weight from .metrics import BaseMetric, metrics from .models import BaseModel, Tree -from .objectives import BaseObjective, objectives +from .objectives import OBJECTIVES_MAP, BaseObjective from .shapley import global_shapley_attributions, local_shapley_attributions from .utils import AddableMixin, AddMember, PickleCupynumericMixin @@ -422,7 +422,7 @@ def fit( # setup objective if isinstance(self.objective, str): - self._objective_instance = objectives[self.objective]() + self._objective_instance = OBJECTIVES_MAP[self.objective]() elif isinstance(self.objective, BaseObjective): self._objective_instance = self.objective else: @@ -528,6 +528,26 @@ def _predict(self, X: cn.ndarray) -> cn.ndarray: pred += Type.batch_predict(models, X) return pred + def predict_raw(self, X: cn.ndarray) -> cn.ndarray: + """Predict pre-transformed values for samples in X. E.g. before applying a + sigmoid function. + + Parameters + ---------- + + X : + The input samples. + + Returns + ------- + + y : + The predicted raw values for each sample in X. + """ + X = _lb_check_X(X) + validate_data(self, X, reset=False, skip_check_array=True) + return self._predict(X) + def dump_models(self) -> str: """Dumps the models in the current instance to a string. @@ -573,7 +593,7 @@ def _make_onnx_init(self, X_dtype): X_out = make_tensor_value_info( "X_out", np_dtype_to_tensor_dtype(X_dtype), - [None, self.model_init_.shape[0]], + [None, None], ) nodes.append(make_node("Identity", ["X_in"], ["X_out"])) graph = make_graph( @@ -612,6 +632,7 @@ def to_onnx(self, X_dtype, predict_function="predict"): Any The ONNX model. """ + from onnx.checker import check_model from onnx.compose import merge_models model = self._make_onnx_init(X_dtype) @@ -634,6 +655,11 @@ def to_onnx(self, X_dtype, predict_function="predict"): prefix2="model_{}_".format(i), ) + # remove the X_out output, we only need the predictions + # add a transform operator + model.graph.output.remove(model.graph.output[0]) + + check_model(model) return model def global_attributions( @@ -1127,26 +1153,6 @@ def fit( ) return self - def predict_raw(self, X: cn.ndarray) -> cn.ndarray: - """Predict pre-transformed values for samples in X. E.g. before applying a - sigmoid function. - - Parameters - ---------- - - X : - The input samples. - - Returns - ------- - - y : - The predicted raw values for each sample in X. - """ - X = _lb_check_X(X) - validate_data(self, X, reset=False, skip_check_array=True) - return super()._predict(X) - def predict_proba(self, X: cn.ndarray) -> cn.ndarray: """Predict class probabilities for samples in X. diff --git a/legateboost/models/tree.py b/legateboost/models/tree.py index c9756992..39e32877 100644 --- a/legateboost/models/tree.py +++ b/legateboost/models/tree.py @@ -361,6 +361,14 @@ def to_onnx(self, X_dtype) -> Any: nodes_falseleafs = self.is_leaf( np.minimum(tree_max_nodes - 1, self.right_child(all_nodes_idx)) ).astype(int) + if self.is_leaf(0): + # we have a decision stump + # according to the onnx operator we must set + # true/false at root to the leaf at 0 + nodes_falsenodeids[0] = 0 + nodes_truenodeids[0] = 0 + nodes_trueleafs[0] = 0 + nodes_falseleafs[0] = 0 num_outputs = self.leaf_value.shape[1] for output_idx in range(0, num_outputs): leaf_targetids = np.full(self.feature.size, output_idx, dtype=np.int64) diff --git a/legateboost/objectives.py b/legateboost/objectives.py index ea389660..e48b7054 100644 --- a/legateboost/objectives.py +++ b/legateboost/objectives.py @@ -628,7 +628,7 @@ def initialise_prediction( return self.one_step_newton(y, w, boost_from_average, init) -objectives = { +OBJECTIVES_MAP = { "squared_error": SquaredErrorObjective, "normal": NormalObjective, "log_loss": LogLossObjective, @@ -638,3 +638,14 @@ def initialise_prediction( "gamma_deviance": GammaDevianceObjective, "gamma": GammaObjective, } + +REGRESSION_OBJECTIVES = ["squared_error", "normal", "gamma_deviance", "gamma"] + +CLASSIFICATION_OBJECTIVES = [ + "log_loss", + "multi_label", + "exp", + "quantile", + "gamma_deviance", + "gamma", +] diff --git a/legateboost/test/test_onnx.py b/legateboost/test/test_onnx.py index f52ea1b5..8a33b743 100644 --- a/legateboost/test/test_onnx.py +++ b/legateboost/test/test_onnx.py @@ -6,39 +6,48 @@ import legateboost as lb -def pred_onnx_estimator(onnx, X, n_outputs): - sess = ort.InferenceSession(onnx.SerializeToString()) - feeds = {"X_in": X} - return sess.run(None, feeds)[1] - - -def pred_onnx_model(onnx, X, n_outputs): - sess = ort.InferenceSession(onnx.SerializeToString()) +def compare_onnx_predictions(estimator, X): + sess = ort.InferenceSession(estimator.to_onnx(X.dtype).SerializeToString()) feeds = { "X_in": X, - "predictions_in": np.zeros((X.shape[0], n_outputs), dtype=X.dtype), } - return sess.run(None, feeds)[1] + if isinstance(estimator, lb.models.BaseModel): + pred = estimator.predict(cn.array(X)) + feeds["predictions_in"] = np.zeros((X.shape[0], pred.shape[1]), dtype=X.dtype) + onnx_pred = sess.run(None, feeds)[1] + else: + pred = estimator.predict_raw(cn.array(X)) + onnx_pred = sess.run(None, feeds)[0] + onnx_pred = onnx_pred.squeeze() + pred = pred.squeeze() + assert pred.shape == onnx_pred.shape + assert np.allclose( + onnx_pred, pred, atol=1e-3 if X.dtype == np.float32 else 1e-6 + ), np.linalg.norm(pred - onnx_pred) -@pytest.mark.parametrize("Model", [M for M in lb.models.BaseModel.__subclasses__()]) -@pytest.mark.parametrize("n_outputs", [1, 5]) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_models(Model, n_outputs, dtype): + +@pytest.fixture +def model_dataset(dtype, n_outputs): rs = np.random.RandomState(0) X = rs.random((1000, 10)).astype(dtype) g = rs.normal(size=(X.shape[0], n_outputs)) h = rs.random(g.shape) + 0.1 + return X, g, h + + +@pytest.mark.parametrize("Model", [M for M in lb.models.BaseModel.__subclasses__()]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("n_outputs", [1, 5]) +def test_models(Model, model_dataset): + X, g, h = model_dataset model = ( Model() .set_random_state(np.random.RandomState(2)) .fit(cn.array(X), cn.array(g), cn.array(h)) ) - onnx_pred = pred_onnx_model(model.to_onnx(X.dtype), X, n_outputs) - lb_pred = model.predict(cn.array(X)) - assert onnx_pred.shape == lb_pred.shape - assert np.allclose(onnx_pred, lb_pred, atol=1e-3 if dtype == np.float32 else 1e-6) + compare_onnx_predictions(model, X) @pytest.mark.parametrize("n_outputs", [1, 5]) @@ -48,27 +57,58 @@ def test_init(n_outputs): y = np.full((3, n_outputs), 5.0, dtype=np.float32) estimator = lb.LBRegressor(n_estimators=0, random_state=0).fit(X, y) assert np.all(estimator.model_init_ == 5.0) - assert np.all(estimator.predict(X) == 5.0) - assert np.all( - pred_onnx_estimator(estimator.to_onnx(X.dtype), X.__array__(), 1) == 5.0 + compare_onnx_predictions(estimator, X) + + +@pytest.fixture +def regression_dataset(dtype, n_outputs): + from sklearn.datasets import make_regression + + X, y = make_regression( + n_samples=1000, + n_features=10, + n_informative=5, + n_targets=n_outputs, + random_state=0, ) + # make labels strictly positive for certain objectives + return X.astype(dtype), np.abs(y.astype(dtype)) @pytest.mark.parametrize("Model", [M for M in lb.models.BaseModel.__subclasses__()]) -@pytest.mark.parametrize("n_outputs", [1, 5]) +@pytest.mark.parametrize("objective", lb.objectives.REGRESSION_OBJECTIVES) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_estimator(Model, n_outputs, dtype): - rs = np.random.RandomState(0) - X = rs.random((1000, 10)).astype(dtype) - y = rs.random((1000, n_outputs)).astype(dtype) +@pytest.mark.parametrize("n_outputs", [1, 5]) +def test_regressor(Model, objective, regression_dataset): + X, y = regression_dataset + if objective in [ + "quantile", + "gamma_deviance", + "gamma", + ] and (y.ndim > 1 and y.shape[1] > 1): + pytest.skip("skipping quantile, gamma and gamma_deviance for multiple outputs") model = lb.LBRegressor( - n_estimators=10, + n_estimators=2, + objective=objective, base_models=(Model(),), random_state=0, ).fit(X, y) - assert np.allclose( - model.predict(X), - pred_onnx_estimator(model.to_onnx(X.dtype), X.__array__(), 1).squeeze(), - atol=1e-3, - ) + compare_onnx_predictions(model, X) + + +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("n_outputs", [1, 5]) +@pytest.mark.parametrize("max_depth", list(range(0, 12, 3))) +def test_tree(regression_dataset, max_depth): + # test tree depths more exhaustively + # some edge cases e.g. max_depth=0 + X, y = regression_dataset + model = lb.LBRegressor( + init=None, + n_estimators=2, + base_models=(lb.models.Tree(max_depth=max_depth),), + random_state=0, + ).fit(X, y) + + compare_onnx_predictions(model, X) diff --git a/legateboost/test/test_with_hypothesis.py b/legateboost/test/test_with_hypothesis.py index 22275f67..c9312079 100644 --- a/legateboost/test/test_with_hypothesis.py +++ b/legateboost/test/test_with_hypothesis.py @@ -1,4 +1,5 @@ import numpy as np +import onnxruntime as ort from hypothesis import HealthCheck, Verbosity, assume, given, settings, strategies as st from sklearn.preprocessing import StandardScaler @@ -25,15 +26,15 @@ @st.composite def tree_strategy(draw): if get_legate_runtime().machine.count(TaskTarget.GPU) > 0: - max_depth = draw(st.integers(1, 8)) + max_depth = draw(st.integers(0, 8)) else: - max_depth = draw(st.integers(1, 6)) - alpha = draw(st.floats(0.0, 1.0)) + max_depth = draw(st.integers(0, 6)) + l2_regularization = draw(st.floats(0.0, 1.0)) split_samples = draw(st.integers(1, 500)) feature_fraction = draw(st.sampled_from([0.5, 1.0])) return lb.models.Tree( max_depth=max_depth, - alpha=alpha, + l2_regularization=l2_regularization, split_samples=split_samples, feature_fraction=feature_fraction, ) @@ -41,20 +42,22 @@ def tree_strategy(draw): @st.composite def nn_strategy(draw): - alpha = draw(st.floats(0.0, 1.0)) + l2_regularization = draw(st.floats(0.0, 1.0)) hidden_layer_sizes = draw(st.sampled_from([(), (100,), (100, 100), (10, 10, 10)])) # max iter needs to be sufficiently large, otherwise the models can make the loss # worse (from a bad initialization) max_iter = 200 return lb.models.NN( - alpha=alpha, hidden_layer_sizes=hidden_layer_sizes, max_iter=max_iter + l2_regularization=l2_regularization, + hidden_layer_sizes=hidden_layer_sizes, + max_iter=max_iter, ) @st.composite def linear_strategy(draw): - alpha = draw(st.floats(0.0, 1.0)) - return lb.models.Linear(alpha=alpha) + l2_regularization = draw(st.floats(0.0, 1.0)) + return lb.models.Linear(l2_regularization=l2_regularization) @st.composite @@ -63,9 +66,11 @@ def krr_strategy(draw): sigma = draw(st.floats(0.1, 1.0)) else: sigma = None - alpha = draw(st.floats(0.0, 1.0)) + l2_regularization = draw(st.floats(0.0, 1.0)) components = draw(st.integers(2, 10)) - return lb.models.KRR(n_components=components, alpha=alpha, sigma=sigma) + return lb.models.KRR( + n_components=components, l2_regularization=l2_regularization, sigma=sigma + ) @st.composite @@ -161,11 +166,20 @@ def test_regressor(model_params, regression_params, regression_dataset): model = lb.LBRegressor(**model_params, **regression_params, verbose=True).fit( X, y, sample_weight=w, eval_result=eval_result ) - model.predict(X) loss = next(iter(eval_result["train"].values())) assert non_increasing(loss, tol=1e-1) sanity_check_models(model) + # check onnx + # for now reshape legate-boost predict to 2-D + # eventually onnx should match the output shape exactly + predict_raw = model.predict_raw(X) + onnx_predict_raw = pred_onnx(model.to_onnx(X.dtype), X) + onnx_predict_raw = onnx_predict_raw.reshape(predict_raw.shape) + assert np.allclose( + predict_raw, onnx_predict_raw, atol=1e-3 if X.dtype == np.float32 else 1e-6 + ), np.linalg.norm(predict_raw - onnx_predict_raw) + classification_param_strategy = st.fixed_dictionaries( { @@ -240,12 +254,18 @@ def classification_dataset_strategy(draw): return X, y, w, name +def pred_onnx(onnx, X): + sess = ort.InferenceSession(onnx.SerializeToString()) + return sess.run(None, {"X_in": X})[0] + + @given( general_model_param_strategy, classification_param_strategy, classification_dataset_strategy(), ) @cn.errstate(divide="raise", invalid="raise") +@settings(print_blob=True) def test_classifier( model_params: dict, classification_params: dict, classification_dataset: tuple ) -> None: @@ -256,8 +276,15 @@ def test_classifier( ) model.predict(X) model.predict_proba(X) - model.predict_raw(X) + predict_raw = model.predict_raw(X) loss = next(iter(eval_result["train"].values())) # multiclass models with higher learning rates don't always converge if len(model.classes_) == 2: assert non_increasing(loss, 1e-1) + + # check onnx + onnx_predict_raw = pred_onnx(model.to_onnx(X.dtype), X) + onnx_predict_raw = onnx_predict_raw.reshape(predict_raw.shape) + assert np.allclose( + predict_raw, onnx_predict_raw, atol=1e-3 if X.dtype == np.float32 else 1e-6 + ), np.linalg.norm(predict_raw - onnx_predict_raw) From 4c9501773858573ee28b88bcde6983ae7ab03314 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Fri, 4 Apr 2025 00:59:46 -0700 Subject: [PATCH 08/21] Use older TreeEnsemble, predictions as double --- conda/environments/all_cuda-122.yaml | 2 +- dependencies.yaml | 2 +- legateboost/legateboost.py | 6 +- legateboost/models/krr.py | 36 +++--- legateboost/models/linear.py | 11 +- legateboost/models/nn.py | 16 ++- legateboost/models/tree.py | 178 ++++++++++++--------------- legateboost/test/test_onnx.py | 20 ++- pyproject.toml | 2 +- 9 files changed, 137 insertions(+), 136 deletions(-) diff --git a/conda/environments/all_cuda-122.yaml b/conda/environments/all_cuda-122.yaml index cf9bd702..6d4e98af 100644 --- a/conda/environments/all_cuda-122.yaml +++ b/conda/environments/all_cuda-122.yaml @@ -30,7 +30,7 @@ dependencies: - numpy - onnx>=1.10 - onnxmltools>=1.10 -- onnxruntime>=1.21 +- onnxruntime - openblas - pydata-sphinx-theme>=0.16 - pytest>=7,<8 diff --git a/dependencies.yaml b/dependencies.yaml index d7501dbd..3700d52e 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -178,4 +178,4 @@ dependencies: - xgboost>=2.0 - onnx>=1.10 - onnxmltools>=1.10 - - onnxruntime>=1.21 + - onnxruntime diff --git a/legateboost/legateboost.py b/legateboost/legateboost.py index 016bb77e..9c40d297 100644 --- a/legateboost/legateboost.py +++ b/legateboost/legateboost.py @@ -562,7 +562,7 @@ def dump_models(self) -> str: def _make_onnx_init(self, X_dtype): # turn self.model_init_ into an ONNX model - from onnx import numpy_helper + from onnx import TensorProto, numpy_helper from onnx.checker import check_model from onnx.helper import ( make_graph, @@ -582,11 +582,11 @@ def _make_onnx_init(self, X_dtype): one = numpy_helper.from_array(np.array([1], dtype=np.int64), name="one") nodes.append(make_node("Concat", ["n_rows", "one"], ["tile_repeat"], axis=0)) init = numpy_helper.from_array( - np.atleast_2d(self.model_init_.__array__().astype(X_dtype)), name="init" + np.atleast_2d(self.model_init_.__array__()), name="init" ) prediction_out = make_tensor_value_info( "predictions_out", - np_dtype_to_tensor_dtype(X_dtype), + TensorProto.DOUBLE, [None, self.model_init_.shape[0]], ) nodes.append(make_node("Tile", ["init", "tile_repeat"], ["predictions_out"])) diff --git a/legateboost/models/krr.py b/legateboost/models/krr.py index cbea5a22..bd0b19f2 100644 --- a/legateboost/models/krr.py +++ b/legateboost/models/krr.py @@ -244,7 +244,7 @@ def __mul__(self, scalar: Any) -> "KRR": return new def to_onnx(self, X_dtype) -> Any: - from onnx import numpy_helper + from onnx import TensorProto, numpy_helper from onnx.checker import check_model from onnx.helper import ( make_graph, @@ -257,14 +257,6 @@ def to_onnx(self, X_dtype) -> Any: assert self.X_train.dtype == self.betas_.dtype - def make_constant_node(value: cn.array, name: str) -> Any: - return make_node( - "Constant", - inputs=[], - value=numpy_helper.from_array(value, name=name), - outputs=[name], - ) - nodes = [] # model constants @@ -279,33 +271,34 @@ def make_constant_node(value: cn.array, name: str) -> Any: ) predictions_in = make_tensor_value_info( "predictions_in", - np_dtype_to_tensor_dtype(self.betas_.dtype), + TensorProto.DOUBLE, [None, n_outputs], ) # exanded l2 distance # distance = np.sum(X**2, axis=1)[:, np.newaxis] - 2 * np.dot(X, self.X_train.T) # + np.sum(self.X_train**2, axis=1) - nodes.append(make_constant_node(np.array([1]), "axis1")) + axis1 = numpy_helper.from_array(np.array([1]), name="axis1") nodes.append(make_node("ReduceSumSquare", ["X_in", "axis1"], ["XX"])) nodes.append( make_node("Gemm", ["X_in", "X_train"], ["XY"], alpha=-2.0, transB=1) ) nodes.append(make_node("ReduceSumSquare", ["X_train", "axis1"], ["YY"])) - nodes.append(make_constant_node(np.array([1, -1]), "reshape")) + reshape = numpy_helper.from_array( + np.array([1, -1], dtype=np.int64), name="reshape" + ) nodes.append(make_node("Reshape", ["YY", "reshape"], ["YY_reshaped"])) nodes.append(make_node("Add", ["XX", "XY"], ["add0"])) nodes.append(make_node("Add", ["YY_reshaped", "add0"], ["l2"])) - nodes.append(make_constant_node(np.array([0.0], self.betas_.dtype), "zero")) + zero = numpy_helper.from_array(np.array([0.0], self.X_train.dtype), name="zero") nodes.append(make_node("Max", ["l2", "zero"], ["l2_clipped"])) # RBF kernel # K = np.exp(-distance / (2 * self.sigma**2)) if self.sigma is None: raise ValueError("sigma is None. Has fit been called?") - nodes.append( - make_constant_node( - np.array([-2.0 * self.sigma**2], self.betas_.dtype), "denominator" - ) + + denominator = numpy_helper.from_array( + np.array([-2.0 * self.sigma**2], self.X_train.dtype), name="denominator" ) nodes.append(make_node("Div", ["l2_clipped", "denominator"], ["rbf0"])) nodes.append(make_node("Exp", ["rbf0"], ["K"])) @@ -317,14 +310,17 @@ def make_constant_node(value: cn.array, name: str) -> Any: # outputs predictions_out = make_tensor_value_info( "predictions_out", - np_dtype_to_tensor_dtype(self.betas_.dtype), + TensorProto.DOUBLE, [None, n_outputs], ) X_out = make_tensor_value_info( "X_out", np_dtype_to_tensor_dtype(self.betas_.dtype), [None, n_features] ) - nodes.append(make_node("Add", ["dot", "predictions_in"], ["predictions_out"])) + nodes.append(make_node("Cast", ["dot"], ["dot_double"], to=TensorProto.DOUBLE)) + nodes.append( + make_node("Add", ["dot_double", "predictions_in"], ["predictions_out"]) + ) nodes.append(make_node("Identity", ["X_in"], ["X_out"])) graph = make_graph( @@ -332,7 +328,7 @@ def make_constant_node(value: cn.array, name: str) -> Any: "legateboost.model.KRR", [X_in, predictions_in], [X_out, predictions_out], - [betas, X_train], + [betas, X_train, axis1, reshape, zero, denominator], ) onnx_model = make_model( graph, diff --git a/legateboost/models/linear.py b/legateboost/models/linear.py index 90e1ee4c..31ee7a28 100644 --- a/legateboost/models/linear.py +++ b/legateboost/models/linear.py @@ -153,7 +153,7 @@ def __mul__(self, scalar: Any) -> "Linear": return new def to_onnx(self, X_dtype) -> Any: - from onnx import numpy_helper + from onnx import TensorProto, numpy_helper from onnx.checker import check_model from onnx.helper import ( make_graph, @@ -178,12 +178,12 @@ def to_onnx(self, X_dtype) -> Any: ) predictions_in = make_tensor_value_info( "predictions_in", - np_dtype_to_tensor_dtype(self.betas_.dtype), + TensorProto.DOUBLE, [None, n_outputs], ) predictions_out = make_tensor_value_info( "predictions_out", - np_dtype_to_tensor_dtype(self.betas_.dtype), + TensorProto.DOUBLE, [None, n_outputs], ) @@ -191,7 +191,10 @@ def to_onnx(self, X_dtype) -> Any: nodes.append(make_node("MatMul", ["X_in", "betas"], ["XBeta"])) nodes.append(make_node("Add", ["XBeta", "intercept"], ["result"])) nodes.append( - make_node("Add", ["result", "predictions_in"], ["predictions_out"]) + make_node("Cast", ["result"], ["result_double"], to=TensorProto.DOUBLE) + ) + nodes.append( + make_node("Add", ["result_double", "predictions_in"], ["predictions_out"]) ) X_out = make_tensor_value_info( "X_out", np_dtype_to_tensor_dtype(self.betas_.dtype), [None, n_features] diff --git a/legateboost/models/nn.py b/legateboost/models/nn.py index d77e4b24..36733c70 100644 --- a/legateboost/models/nn.py +++ b/legateboost/models/nn.py @@ -183,7 +183,7 @@ def __mul__(self, scalar: Any) -> "NN": return new def to_onnx(self, X_dtype) -> Any: - from onnx import numpy_helper + from onnx import TensorProto, numpy_helper from onnx.checker import check_model from onnx.helper import ( make_graph, @@ -214,7 +214,7 @@ def to_onnx(self, X_dtype) -> Any: ) predictions_in = make_tensor_value_info( "predictions_in", - np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), + TensorProto.DOUBLE, [None, n_outputs], ) nodes = [] @@ -246,14 +246,22 @@ def to_onnx(self, X_dtype) -> Any: nodes.append(make_node("Identity", ["X_in"], ["X_out"])) predictions_out = make_tensor_value_info( "predictions_out", - np_dtype_to_tensor_dtype(self.coefficients_[0].dtype), + TensorProto.DOUBLE, [None, n_outputs], ) + nodes.append( + make_node( + "Cast", + ["activations{}withbias".format(len(self.coefficients_) - 1)], + ["casted"], + to=TensorProto.DOUBLE, + ) + ) nodes.append( make_node( "Add", [ - "activations{}withbias".format(len(self.coefficients_) - 1), + "casted", "predictions_in", ], ["predictions_out"], diff --git a/legateboost/models/tree.py b/legateboost/models/tree.py index 39e32877..ed612ab1 100644 --- a/legateboost/models/tree.py +++ b/legateboost/models/tree.py @@ -318,144 +318,120 @@ def __mul__(self, scalar: Any) -> "Tree": def to_onnx(self, X_dtype) -> Any: import onnx - from onnx import numpy_helper + from onnx import TensorProto, numpy_helper from onnx.checker import check_model from onnx.helper import ( make_graph, make_model, make_node, - make_tensor, make_tensor_value_info, np_dtype_to_tensor_dtype, ) onnx_nodes = [] - # We map the legate-boost tree representation to the TreeEnsemble ONNX operator - # the features array, splits array, and leaf weights can be passed unchanged - # ONNX then requires some extra arrays to represent the tree structure - # - nodes_truenodeidx is the index of the left child for a given node - # - nodes_falsenodeidx is the index of the right child for a given node - # - nodes_modes indicates that nodes use a <= comparison operator - # - nodes_trueleafs indicates that the left child is a leaf node - # - nodes_falseleafs indicates that the right child is a leaf node - # - leaf_targetids indicates which output the leaf node corresponds to - # ONNX does not support vector leaf so we will repeat the tree n_outputs - # times, each time with a different constant for leaf_targetids - # This is not ideal but I don't see a better way - + num_outputs = self.leaf_value.shape[1] tree_max_nodes = self.feature.size all_nodes_idx = np.arange(tree_max_nodes) nodes_featureids = self.feature.__array__() - nodes_splits = numpy_helper.from_array( - self.split_value.__array__().astype(X_dtype) - ) nodes_truenodeids = self.left_child(all_nodes_idx) - # get the left child of each node and check if it is a leaf - # if the node is already leaf then its child can go off the end of the array - # use np.minimum to avoid this - nodes_trueleafs = self.is_leaf( - np.minimum(tree_max_nodes - 1, self.left_child(all_nodes_idx)) - ).astype(int) nodes_falsenodeids = self.right_child(all_nodes_idx) - nodes_falseleafs = self.is_leaf( - np.minimum(tree_max_nodes - 1, self.right_child(all_nodes_idx)) - ).astype(int) - if self.is_leaf(0): - # we have a decision stump - # according to the onnx operator we must set - # true/false at root to the leaf at 0 - nodes_falsenodeids[0] = 0 - nodes_truenodeids[0] = 0 - nodes_trueleafs[0] = 0 - nodes_falseleafs[0] = 0 - num_outputs = self.leaf_value.shape[1] - for output_idx in range(0, num_outputs): - leaf_targetids = np.full(self.feature.size, output_idx, dtype=np.int64) - leaf_weights = numpy_helper.from_array( - self.leaf_value[:, output_idx].__array__().astype(X_dtype) + node_modes = np.full(tree_max_nodes, "BRANCH_LEQ") + node_modes[self.is_leaf(all_nodes_idx)] = "LEAF" + leaf_targetids = np.full(tree_max_nodes, 0, dtype=np.int64) + # predict the leaf node index + # use it to later index into the 2d array of leaf weights + # as ONNX does not support 2d leaf weights + target_weights = all_nodes_idx.astype(np.float32) + kwargs = {} + # TreeEnsembleRegressor asks us to pass these as tensors when X_dtype is double + if X_dtype == np.float32: + kwargs["nodes_values"] = self.split_value.__array__() + kwargs["target_weights"] = target_weights + else: + kwargs["nodes_values_as_tensor"] = numpy_helper.from_array( + self.split_value.__array__(), name="nodes_values" ) - - onnx_nodes.append( - make_node( - "TreeEnsemble", - ["X_in"], - ["pred" + str(output_idx)], - domain="ai.onnx.ml", - n_targets=self.leaf_value.shape[1], - membership_values=None, - nodes_missing_value_tracks_true=None, - nodes_hitrates=None, - aggregate_function=1, - post_transform=0, - tree_roots=[0], - nodes_modes=make_tensor( - "nodes_modes", - onnx.TensorProto.UINT8, - self.feature.shape, - np.zeros_like(self.feature, dtype=np.uint8), - ), - nodes_featureids=nodes_featureids, - nodes_splits=nodes_splits, - nodes_truenodeids=nodes_truenodeids, - nodes_trueleafs=nodes_trueleafs, - nodes_falsenodeids=nodes_falsenodeids, - nodes_falseleafs=nodes_falseleafs, - leaf_targetids=leaf_targetids, - leaf_weights=leaf_weights, - ) + kwargs["target_weights_as_tensor"] = numpy_helper.from_array( + target_weights.astype(np.float64), name="target_weights" ) - if output_idx == 0: - onnx_nodes.append( - onnx.helper.make_node( - "Identity", - ["pred" + str(output_idx)], - ["accumulated_pred0"], - ) - ) - else: - onnx_nodes.append( - onnx.helper.make_node( - "Add", - [ - "accumulated_pred" + str(output_idx - 1), - "pred" + str(output_idx), - ], - ["accumulated_pred" + str(output_idx)], - ) - ) + # TreeEnsembleRegressor is deprecated, but its successor TreeEnsemble + # is at the time of writing not available from onnxruntime on conda-forge + # This can be updated at some point without too much trouble + onnx_nodes.append( + make_node( + "TreeEnsembleRegressor", + ["X_in"], + ["predicted_leaf_index"], + domain="ai.onnx.ml", + n_targets=1, + membership_values=None, + nodes_missing_value_tracks_true=None, + nodes_hitrates=None, + nodes_modes=node_modes, + nodes_featureids=nodes_featureids, + nodes_truenodeids=nodes_truenodeids, + nodes_falsenodeids=nodes_falsenodeids, + nodes_nodeids=all_nodes_idx, + nodes_treeids=np.zeros(tree_max_nodes, dtype=np.int64), + target_ids=leaf_targetids, + target_nodeids=all_nodes_idx, + target_treeids=np.zeros(tree_max_nodes, dtype=np.int64), + **kwargs, + ) + ) - X_in = make_tensor_value_info( - "X_in", np_dtype_to_tensor_dtype(X_dtype), [None, None] + leaf_weights = numpy_helper.from_array( + self.leaf_value.__array__(), name="leaf_weights" ) - X_out = make_tensor_value_info( - "X_out", np_dtype_to_tensor_dtype(X_dtype), [None, None] + predictions_out = make_tensor_value_info( + "predictions_out", TensorProto.DOUBLE, [None, num_outputs] ) - predictions_in = make_tensor_value_info( - "predictions_in", np_dtype_to_tensor_dtype(X_dtype), [None, num_outputs] + # make indices 1-d + onnx_nodes.append( + make_node( + "Squeeze", ["predicted_leaf_index"], ["predicted_leaf_index_squeezed"] + ) ) - predictions_out = make_tensor_value_info( - "predictions_out", np_dtype_to_tensor_dtype(X_dtype), [None, num_outputs] + onnx_nodes.append( + make_node( + "Cast", + ["predicted_leaf_index_squeezed"], + ["predicted_leaf_index_int"], + to=TensorProto.INT32, + ) ) - onnx_nodes.append(make_node("Identity", ["X_in"], ["X_out"])) onnx_nodes.append( make_node( - "Add", - ["predictions_in", "accumulated_pred" + str(num_outputs - 1)], - ["predictions_out"], + "Gather", ["leaf_weights", "predicted_leaf_index_int"], ["gathered"] ) ) + predictions_in = make_tensor_value_info( + "predictions_in", TensorProto.DOUBLE, [None, num_outputs] + ) + onnx_nodes.append( + make_node("Add", ["predictions_in", "gathered"], ["predictions_out"]) + ) + + X_in = make_tensor_value_info( + "X_in", np_dtype_to_tensor_dtype(X_dtype), [None, None] + ) + X_out = make_tensor_value_info( + "X_out", np_dtype_to_tensor_dtype(X_dtype), [None, None] + ) + onnx_nodes.append(make_node("Identity", ["X_in"], ["X_out"])) graph = make_graph( onnx_nodes, "legateboost.models.Tree", [X_in, predictions_in], [X_out, predictions_out], + [leaf_weights], ) model = make_model( graph, opset_imports=[ - onnx.helper.make_opsetid("ai.onnx.ml", 5), + onnx.helper.make_opsetid("ai.onnx.ml", 3), onnx.helper.make_opsetid("", 21), ], ) diff --git a/legateboost/test/test_onnx.py b/legateboost/test/test_onnx.py index 8a33b743..62ad44e4 100644 --- a/legateboost/test/test_onnx.py +++ b/legateboost/test/test_onnx.py @@ -13,13 +13,14 @@ def compare_onnx_predictions(estimator, X): } if isinstance(estimator, lb.models.BaseModel): pred = estimator.predict(cn.array(X)) - feeds["predictions_in"] = np.zeros((X.shape[0], pred.shape[1]), dtype=X.dtype) + feeds["predictions_in"] = np.zeros((X.shape[0], pred.shape[1])) onnx_pred = sess.run(None, feeds)[1] else: pred = estimator.predict_raw(cn.array(X)) onnx_pred = sess.run(None, feeds)[0] onnx_pred = onnx_pred.squeeze() + assert onnx_pred.dtype == np.float64 pred = pred.squeeze() assert pred.shape == onnx_pred.shape assert np.allclose( @@ -112,3 +113,20 @@ def test_tree(regression_dataset, max_depth): ).fit(X, y) compare_onnx_predictions(model, X) + + +@pytest.mark.parametrize("dtype", [np.float32]) +@pytest.mark.parametrize("n_outputs", [1]) +def test_small_tree(regression_dataset, dtype, n_outputs): + max_depth = 0 + # test tree depths more exhaustively + # some edge cases e.g. max_depth=0 + X, y = regression_dataset + model = lb.LBRegressor( + init=None, + n_estimators=2, + base_models=(lb.models.Tree(max_depth=max_depth),), + random_state=0, + ).fit(X, y) + + compare_onnx_predictions(model, X) diff --git a/pyproject.toml b/pyproject.toml index f6ab85c1..e8269341 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ test = [ "notebook>=7", "onnx>=1.10", "onnxmltools>=1.10", - "onnxruntime>=1.21", + "onnxruntime", "pytest>=7,<8", "seaborn>=0.13", "xgboost>=2.0", From 438958c94d82632c9325755f68a96e8272f681cf Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Fri, 4 Apr 2025 01:41:03 -0700 Subject: [PATCH 09/21] Update docs --- legateboost/legateboost.py | 23 +++++++-- legateboost/models/base_model.py | 8 ++- legateboost/objectives.py | 3 -- legateboost/test/test_onnx.py | 88 ++++++++++++++++++++++---------- 4 files changed, 87 insertions(+), 35 deletions(-) diff --git a/legateboost/legateboost.py b/legateboost/legateboost.py index 9c40d297..49c7077c 100644 --- a/legateboost/legateboost.py +++ b/legateboost/legateboost.py @@ -614,7 +614,11 @@ def _make_onnx_init(self, X_dtype): return onnx_model def to_onnx(self, X_dtype, predict_function="predict"): - """Converts the model to an ONNX model. + """Converts the estimator to an ONNX model which is expected to produce + equivalent predictions to `predict_function` up to reasonable floating + point tolerance. The ONNX model is hard coded to the X input data type, + separate models should be generated for float and double. The ONNX model + takes "X_in" as input and produces "predictions_out" as output. Parameters ---------- @@ -631,6 +635,19 @@ def to_onnx(self, X_dtype, predict_function="predict"): ------- Any The ONNX model. + + Examples + -------- + >>> import numpy as np + >>> import legateboost as lb + >>> X = np.random.random((1000, 10)) + >>> y = np.random.random(X.shape[0]) + >>> model = lb.LBRegressor(n_estimators=5).fit(X, y) + >>> import onnxruntime as ort + >>> sess = ort.InferenceSession(model.to_onnx(X.dtype).SerializeToString()) + >>> onnx_pred = sess.run(None, {"X_in": X})[0] + >>> assert np.allclose(model.predict(X), onnx_pred, atol=1e-6) + >>> """ from onnx.checker import check_model from onnx.compose import merge_models @@ -823,10 +840,10 @@ class LBRegressor(RegressorMixin, LBBase): Examples -------- >>> import cupynumeric as cn - >>> import legateboost as lbst + >>> import legateboost as lb >>> X = cn.random.random((1000, 10)) >>> y = cn.random.random(X.shape[0]) - >>> model = lbst.LBRegressor(n_estimators=5).fit(X, y) + >>> model = lb.LBRegressor(n_estimators=5).fit(X, y) >>> """ diff --git a/legateboost/models/base_model.py b/legateboost/models/base_model.py index a1e88011..07757ab6 100644 --- a/legateboost/models/base_model.py +++ b/legateboost/models/base_model.py @@ -127,9 +127,15 @@ def __mul__(self, scalar: Any) -> "BaseModel": def __hash__(self) -> int: return hash(str(self)) - def to_onnx(self) -> Any: + def to_onnx(self, X_dtype) -> Any: """Convert the model to an ONNX model. + The implemented ONNX model should accept the following two inputs: + - "X_in" : 2D tensor of shape (n_samples, n_features) and type `X_dtype`. + - "predictions in" : 2D tensor of shape (n_samples, n_outputs) and type double. + The model should output: + - "predictions out" : 2D tensor of shape (n_samples, n_outputs) and type double. + Returns ------- Any diff --git a/legateboost/objectives.py b/legateboost/objectives.py index e48b7054..abe4715e 100644 --- a/legateboost/objectives.py +++ b/legateboost/objectives.py @@ -645,7 +645,4 @@ def initialise_prediction( "log_loss", "multi_label", "exp", - "quantile", - "gamma_deviance", - "gamma", ] diff --git a/legateboost/test/test_onnx.py b/legateboost/test/test_onnx.py index 62ad44e4..7b48fad6 100644 --- a/legateboost/test/test_onnx.py +++ b/legateboost/test/test_onnx.py @@ -6,19 +6,14 @@ import legateboost as lb -def compare_onnx_predictions(estimator, X): - sess = ort.InferenceSession(estimator.to_onnx(X.dtype).SerializeToString()) +def compare_model_predictions(model, X): + sess = ort.InferenceSession(model.to_onnx(X.dtype).SerializeToString()) feeds = { "X_in": X, } - if isinstance(estimator, lb.models.BaseModel): - pred = estimator.predict(cn.array(X)) - feeds["predictions_in"] = np.zeros((X.shape[0], pred.shape[1])) - onnx_pred = sess.run(None, feeds)[1] - else: - pred = estimator.predict_raw(cn.array(X)) - onnx_pred = sess.run(None, feeds)[0] - + pred = model.predict(cn.array(X)) + feeds["predictions_in"] = np.zeros((X.shape[0], pred.shape[1])) + onnx_pred = sess.run(None, feeds)[1] onnx_pred = onnx_pred.squeeze() assert onnx_pred.dtype == np.float64 pred = pred.squeeze() @@ -28,6 +23,25 @@ def compare_onnx_predictions(estimator, X): ), np.linalg.norm(pred - onnx_pred) +def compare_estimator_predictions(estimator, X, predict_function): + sess = ort.InferenceSession( + estimator.to_onnx(X.dtype, predict_function).SerializeToString() + ) + feeds = { + "X_in": X, + } + pred = estimator.predict_raw(cn.array(X)) + onnx_pred = sess.run(None, feeds)[0] + + onnx_pred = onnx_pred.squeeze() + assert onnx_pred.dtype == np.float64 + pred = pred.squeeze() + assert pred.shape == onnx_pred.shape + assert np.allclose( + onnx_pred, pred, atol=1e-2 if X.dtype == np.float32 else 1e-6 + ), np.linalg.norm(pred - onnx_pred) + + @pytest.fixture def model_dataset(dtype, n_outputs): rs = np.random.RandomState(0) @@ -48,7 +62,7 @@ def test_models(Model, model_dataset): .fit(cn.array(X), cn.array(g), cn.array(h)) ) - compare_onnx_predictions(model, X) + compare_model_predictions(model, X) @pytest.mark.parametrize("n_outputs", [1, 5]) @@ -58,7 +72,7 @@ def test_init(n_outputs): y = np.full((3, n_outputs), 5.0, dtype=np.float32) estimator = lb.LBRegressor(n_estimators=0, random_state=0).fit(X, y) assert np.all(estimator.model_init_ == 5.0) - compare_onnx_predictions(estimator, X) + compare_estimator_predictions(estimator, X, "predict_raw") @pytest.fixture @@ -95,30 +109,48 @@ def test_regressor(Model, objective, regression_dataset): random_state=0, ).fit(X, y) - compare_onnx_predictions(model, X) + compare_estimator_predictions(model, X, "predict_raw") + + +@pytest.fixture +def classification_dataset(dtype, n_outputs): + from sklearn.datasets import make_classification + + X, y = make_classification( + n_samples=1000, + n_features=10, + n_informative=5, + n_classes=n_outputs, + random_state=0, + ) + return X.astype(dtype), np.abs(y.astype(dtype)) +@pytest.mark.parametrize("Model", [M for M in lb.models.BaseModel.__subclasses__()]) +@pytest.mark.parametrize("objective", lb.objectives.CLASSIFICATION_OBJECTIVES) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -@pytest.mark.parametrize("n_outputs", [1, 5]) -@pytest.mark.parametrize("max_depth", list(range(0, 12, 3))) -def test_tree(regression_dataset, max_depth): - # test tree depths more exhaustively - # some edge cases e.g. max_depth=0 - X, y = regression_dataset - model = lb.LBRegressor( - init=None, +@pytest.mark.parametrize("n_outputs", [2, 5]) +def test_classifier(Model, objective, classification_dataset): + X, y = classification_dataset + if objective == "multi_label": + # encode labels as one-hot + encoded = np.zeros((y.shape[0], int(y.max() + 1))) + encoded[np.arange(y.shape[0]), y.astype(int)] = 1 + y = encoded + model = lb.LBClassifier( n_estimators=2, - base_models=(lb.models.Tree(max_depth=max_depth),), + objective=objective, + base_models=(Model(),), random_state=0, ).fit(X, y) - compare_onnx_predictions(model, X) + compare_estimator_predictions(model, X, "predict_raw") -@pytest.mark.parametrize("dtype", [np.float32]) -@pytest.mark.parametrize("n_outputs", [1]) -def test_small_tree(regression_dataset, dtype, n_outputs): - max_depth = 0 +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +@pytest.mark.parametrize("n_outputs", [1, 5]) +@pytest.mark.parametrize("max_depth", list(range(0, 12, 3))) +def test_tree(regression_dataset, max_depth): # test tree depths more exhaustively # some edge cases e.g. max_depth=0 X, y = regression_dataset @@ -129,4 +161,4 @@ def test_small_tree(regression_dataset, dtype, n_outputs): random_state=0, ).fit(X, y) - compare_onnx_predictions(model, X) + compare_estimator_predictions(model, X, "predict_raw") From 1c74de5967dc49961c204960028b80439143ed07 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Thu, 10 Apr 2025 01:26:56 -0700 Subject: [PATCH 10/21] Implement normal onnx operator --- legateboost/legateboost.py | 81 ++++++++++++++++-- legateboost/models/base_model.py | 9 +- legateboost/models/krr.py | 2 +- legateboost/models/linear.py | 2 +- legateboost/models/nn.py | 2 +- legateboost/models/tree.py | 10 +-- legateboost/objectives.py | 138 ++++++++++++++++++++++++++++++- legateboost/test/test_onnx.py | 10 +-- 8 files changed, 230 insertions(+), 24 deletions(-) diff --git a/legateboost/legateboost.py b/legateboost/legateboost.py index 49c7077c..48f6a228 100644 --- a/legateboost/legateboost.py +++ b/legateboost/legateboost.py @@ -613,7 +613,7 @@ def _make_onnx_init(self, X_dtype): return onnx_model - def to_onnx(self, X_dtype, predict_function="predict"): + def to_onnx(self, X: cn.ndarray, predict_function="predict"): """Converts the estimator to an ONNX model which is expected to produce equivalent predictions to `predict_function` up to reasonable floating point tolerance. The ONNX model is hard coded to the X input data type, @@ -622,11 +622,9 @@ def to_onnx(self, X_dtype, predict_function="predict"): Parameters ---------- - X_dtype : numpy.dtype - The expected data type of the input data. ONNX models hard - code the data type of the input data and will crash if this is - not set correctly. - Can be np.float32 or np.float64. + X: + Example input data. Use to infer input data characteristics. + A model produced for float32 will not accept float64 input and vice versa. predict_function : str The serialised ONNX model can produce output equivalent to 'predict', 'predict_proba', or 'predict_raw'. @@ -649,14 +647,28 @@ def to_onnx(self, X_dtype, predict_function="predict"): >>> assert np.allclose(model.predict(X), onnx_pred, atol=1e-6) >>> """ + if predict_function not in ["predict", "predict_proba", "predict_raw"]: + raise ValueError( + "predict_function should be one of " + "['predict', 'predict_proba', 'predict_raw']" + ) + + from onnx import TensorProto, numpy_helper from onnx.checker import check_model from onnx.compose import merge_models + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + ) - model = self._make_onnx_init(X_dtype) + model = self._make_onnx_init(X.dtype) if self.models_ is not None and len(self.models_) > 0: model = merge_models( model, - self.models_[0].to_onnx(X_dtype), + self.models_[0].to_onnx(X), io_map=[("X_out", "X_in"), ("predictions_out", "predictions_in")], prefix2="model_0_", ) @@ -664,7 +676,7 @@ def to_onnx(self, X_dtype, predict_function="predict"): for i in range(1, len(self.models_)): model = merge_models( model, - self.models_[i].to_onnx(X_dtype), + self.models_[i].to_onnx(X), io_map=[ ("model_{}_X_out".format(i - 1), "X_in"), ("model_{}_predictions_out".format(i - 1), "predictions_in"), @@ -676,6 +688,57 @@ def to_onnx(self, X_dtype, predict_function="predict"): # add a transform operator model.graph.output.remove(model.graph.output[0]) + # add any transform from the objective + if predict_function == "predict": + model = merge_models( + model, + self._objective_instance.onnx_transform(), + io_map=[ + ( + "model_{}_predictions_out".format(len(self.models_) - 1), + "predictions_in", + ) + ], + prefix2="transform_", + ) + # coerce the output shape to be the same as the equivalent predict function + test_pred = getattr(self, predict_function)(X[0:1]) + + extra_out_shape = [] if test_pred.ndim == 1 else list(test_pred.shape[1:]) + shape = numpy_helper.from_array( + np.array([-1] + extra_out_shape), name="shape" + ) + + reshape_predictions_in = make_tensor_value_info( + "reshape_predictions_in", + TensorProto.DOUBLE, + [None, None], + ) + reshaped_predictions = make_tensor_value_info( + "reshaped_predictions", + TensorProto.DOUBLE, + shape=[None] + list(extra_out_shape), + ) + nodes = [ + make_node( + "Reshape", + ["reshape_predictions_in", "shape"], + ["reshaped_predictions"], + ) + ] + graph = make_graph( + nodes, + "legateboost estimator transform", + [reshape_predictions_in], + [reshaped_predictions], + [shape], + ) + model = merge_models( + model, + make_model(graph, opset_imports=[make_opsetid("", 21)]), + io_map=[("transform_predictions_out", "reshape_predictions_in")], + ) + check_model(model) return model diff --git a/legateboost/models/base_model.py b/legateboost/models/base_model.py index 07757ab6..45fac68d 100644 --- a/legateboost/models/base_model.py +++ b/legateboost/models/base_model.py @@ -127,7 +127,7 @@ def __mul__(self, scalar: Any) -> "BaseModel": def __hash__(self) -> int: return hash(str(self)) - def to_onnx(self, X_dtype) -> Any: + def to_onnx(self, X) -> Any: """Convert the model to an ONNX model. The implemented ONNX model should accept the following two inputs: @@ -136,6 +136,13 @@ def to_onnx(self, X_dtype) -> Any: The model should output: - "predictions out" : 2D tensor of shape (n_samples, n_outputs) and type double. + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + Example input X matrix. Used to infer type and shape of the input. + + y_pred : ndarray of shape (n_samples,) + The predicted labels. Returns ------- Any diff --git a/legateboost/models/krr.py b/legateboost/models/krr.py index bd0b19f2..da2e22ce 100644 --- a/legateboost/models/krr.py +++ b/legateboost/models/krr.py @@ -243,7 +243,7 @@ def __mul__(self, scalar: Any) -> "KRR": self.betas_ *= scalar return new - def to_onnx(self, X_dtype) -> Any: + def to_onnx(self, X) -> Any: from onnx import TensorProto, numpy_helper from onnx.checker import check_model from onnx.helper import ( diff --git a/legateboost/models/linear.py b/legateboost/models/linear.py index 31ee7a28..de88ef2c 100644 --- a/legateboost/models/linear.py +++ b/legateboost/models/linear.py @@ -152,7 +152,7 @@ def __mul__(self, scalar: Any) -> "Linear": new.betas_ *= scalar return new - def to_onnx(self, X_dtype) -> Any: + def to_onnx(self, X) -> Any: from onnx import TensorProto, numpy_helper from onnx.checker import check_model from onnx.helper import ( diff --git a/legateboost/models/nn.py b/legateboost/models/nn.py index 36733c70..d628b6e5 100644 --- a/legateboost/models/nn.py +++ b/legateboost/models/nn.py @@ -182,7 +182,7 @@ def __mul__(self, scalar: Any) -> "NN": new.biases_[-1] *= scalar return new - def to_onnx(self, X_dtype) -> Any: + def to_onnx(self, X) -> Any: from onnx import TensorProto, numpy_helper from onnx.checker import check_model from onnx.helper import ( diff --git a/legateboost/models/tree.py b/legateboost/models/tree.py index ed612ab1..c6711172 100644 --- a/legateboost/models/tree.py +++ b/legateboost/models/tree.py @@ -316,7 +316,7 @@ def __mul__(self, scalar: Any) -> "Tree": new.leaf_value *= scalar return new - def to_onnx(self, X_dtype) -> Any: + def to_onnx(self, X) -> Any: import onnx from onnx import TensorProto, numpy_helper from onnx.checker import check_model @@ -344,8 +344,8 @@ def to_onnx(self, X_dtype) -> Any: # as ONNX does not support 2d leaf weights target_weights = all_nodes_idx.astype(np.float32) kwargs = {} - # TreeEnsembleRegressor asks us to pass these as tensors when X_dtype is double - if X_dtype == np.float32: + # TreeEnsembleRegressor asks us to pass these as tensors when X.dtype is double + if X.dtype == np.float32: kwargs["nodes_values"] = self.split_value.__array__() kwargs["target_weights"] = target_weights else: @@ -415,10 +415,10 @@ def to_onnx(self, X_dtype) -> Any: ) X_in = make_tensor_value_info( - "X_in", np_dtype_to_tensor_dtype(X_dtype), [None, None] + "X_in", np_dtype_to_tensor_dtype(X.dtype), [None, None] ) X_out = make_tensor_value_info( - "X_out", np_dtype_to_tensor_dtype(X_dtype), [None, None] + "X_out", np_dtype_to_tensor_dtype(X.dtype), [None, None] ) onnx_nodes.append(make_node("Identity", ["X_in"], ["X_out"])) graph = make_graph( diff --git a/legateboost/objectives.py b/legateboost/objectives.py index abe4715e..755f31d0 100644 --- a/legateboost/objectives.py +++ b/legateboost/objectives.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod from typing import Tuple +import numpy as np from scipy.stats import norm from typing_extensions import TypeAlias, override @@ -70,6 +71,54 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: """ return pred + def onnx_transform(self) -> cn.ndarray: + """Returns an ONNX model that accepts + - "predictions_in" : 2D tensor of shape (n_samples, n_outputs) and type double. + And outputs the transformed predictions. + - "predictions_out" : arbitrary tensor depending on the objective. + + Is by default the identity transform. + + The ONNX transform should produce the same output as the transform + method for each objective. + + Returns: + Onnx model that transforms the predictions. + """ + from onnx import TensorProto + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + ) + + predictions_in = make_tensor_value_info( + "predictions_in", + TensorProto.DOUBLE, + [None, None], + ) + predictions_out = make_tensor_value_info( + "predictions_out", + TensorProto.DOUBLE, + [None, None], + ) + nodes = [make_node("Identity", ["predictions_in"], ["predictions_out"])] + graph = make_graph( + nodes, + "BaseModel", + [predictions_in], + [predictions_out], + ) + onnx_model = make_model( + graph, + opset_imports=[make_opsetid("", 21)], + ) + check_model(onnx_model) + return onnx_model + @abstractmethod def metric(self) -> BaseMetric: """Returns the default error metric for the objective function. @@ -243,6 +292,93 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: pred[:, :, 1] = cn.clip(pred[:, :, 1], -5, 5) return pred + def onnx_transform(self) -> cn.ndarray: + from onnx import TensorProto, numpy_helper + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + ) + + predictions_in = make_tensor_value_info( + "predictions_in", + TensorProto.DOUBLE, + [None, None], + ) + predictions_out = make_tensor_value_info( + "predictions_out", + TensorProto.DOUBLE, + [None, None, 2], + ) + nodes = [] + # clip + mininmum = numpy_helper.from_array( + np.array(-5, dtype=np.float64), name="minimum" + ) + maximum = numpy_helper.from_array(np.array(5, dtype=np.float64), name="maximum") + # reshape + out_shape = numpy_helper.from_array( + np.array([0, -1, 2], dtype=np.int64), name="out_shape" + ) + nodes.append( + make_node("Reshape", ["predictions_in", "out_shape"], ["reshaped"]) + ) + + nodes.append(make_node("Shape", ["reshaped"], ["new_shape"])) + + var_starts = numpy_helper.from_array( + np.array([0, 0, 1], dtype=np.int64), name="var_starts" + ) + mean_starts = numpy_helper.from_array( + np.array([0, 0, 0], dtype=np.int64), name="mean_starts" + ) + + # extract mean and variance parts + axis = numpy_helper.from_array(np.array([0, 1, 2], dtype=np.int64), name="axis") + steps = numpy_helper.from_array( + np.array([1, 1, 2], dtype=np.int64), name="steps" + ) + nodes.append( + make_node( + "Slice", + ["reshaped", "var_starts", "new_shape", "axis", "steps"], + ["variance"], + ) + ) + nodes.append( + make_node( + "Slice", + ["reshaped", "mean_starts", "new_shape", "axis", "steps"], + ["mean"], + ) + ) + nodes.append( + make_node("Clip", ["variance", "minimum", "maximum"], ["clipped_variance"]) + ) + + # combine them again + nodes.append( + make_node( + "Concat", ["mean", "clipped_variance"], ["predictions_out"], axis=2 + ) + ) + graph = make_graph( + nodes, + "NormalObjective", + [predictions_in], + [predictions_out], + [out_shape, var_starts, mean_starts, axis, steps, mininmum, maximum], + ) + onnx_model = make_model( + graph, + opset_imports=[make_opsetid("", 21)], + ) + check_model(onnx_model) + return onnx_model + @override def mean(self, param: cn.ndarray) -> cn.ndarray: """Return the mean for the Normal distribution.""" @@ -421,7 +557,7 @@ def var(self, param: cn.ndarray) -> cn.ndarray: class QuantileObjective(BaseObjective): """Minimises the quantile loss, otherwise known as check loss or pinball loss. - :math:`L(y_i, p_i) = \\frac{1}{k}\\sum_{j=1}^{k} (q_j - \\mathbb{1})(y_i - p_{i, j})` + :math:`L(y_i, p_i) = \\frac{}{k}\\sum_{j=1}^{k} (q_j - \\mathbb{1})(y_i - p_{i, j})` where diff --git a/legateboost/test/test_onnx.py b/legateboost/test/test_onnx.py index 7b48fad6..fc686b1f 100644 --- a/legateboost/test/test_onnx.py +++ b/legateboost/test/test_onnx.py @@ -7,7 +7,7 @@ def compare_model_predictions(model, X): - sess = ort.InferenceSession(model.to_onnx(X.dtype).SerializeToString()) + sess = ort.InferenceSession(model.to_onnx(X).SerializeToString()) feeds = { "X_in": X, } @@ -25,17 +25,16 @@ def compare_model_predictions(model, X): def compare_estimator_predictions(estimator, X, predict_function): sess = ort.InferenceSession( - estimator.to_onnx(X.dtype, predict_function).SerializeToString() + estimator.to_onnx(X, predict_function).SerializeToString() ) feeds = { "X_in": X, } - pred = estimator.predict_raw(cn.array(X)) + pred_method = getattr(estimator, predict_function) + pred = pred_method(cn.array(X)) onnx_pred = sess.run(None, feeds)[0] - onnx_pred = onnx_pred.squeeze() assert onnx_pred.dtype == np.float64 - pred = pred.squeeze() assert pred.shape == onnx_pred.shape assert np.allclose( onnx_pred, pred, atol=1e-2 if X.dtype == np.float32 else 1e-6 @@ -110,6 +109,7 @@ def test_regressor(Model, objective, regression_dataset): ).fit(X, y) compare_estimator_predictions(model, X, "predict_raw") + compare_estimator_predictions(model, X, "predict") @pytest.fixture From 692a1a5200e4b608c9584fffecf8446f03e5b8f4 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Thu, 10 Apr 2025 02:35:18 -0700 Subject: [PATCH 11/21] Implement remaining transforms --- legateboost/legateboost.py | 2 +- legateboost/objectives.py | 218 ++++++++++++++++++++++++++++- legateboost/test/test_objective.py | 28 ++++ 3 files changed, 245 insertions(+), 3 deletions(-) diff --git a/legateboost/legateboost.py b/legateboost/legateboost.py index 48f6a228..62dc48f7 100644 --- a/legateboost/legateboost.py +++ b/legateboost/legateboost.py @@ -692,7 +692,7 @@ def to_onnx(self, X: cn.ndarray, predict_function="predict"): if predict_function == "predict": model = merge_models( model, - self._objective_instance.onnx_transform(), + self._objective_instance.onnx_transform(self.predict_raw(X[0:1])), io_map=[ ( "model_{}_predictions_out".format(len(self.models_) - 1), diff --git a/legateboost/objectives.py b/legateboost/objectives.py index 755f31d0..5c59330d 100644 --- a/legateboost/objectives.py +++ b/legateboost/objectives.py @@ -44,6 +44,7 @@ class BaseObjective(ABC): # utility constant one = cn.ones(1, dtype=cn.float64) + half = cn.array(0.5, dtype=cn.float64) @abstractmethod def gradient(self, y: cn.ndarray, pred: cn.ndarray) -> GradPair: @@ -71,7 +72,7 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: """ return pred - def onnx_transform(self) -> cn.ndarray: + def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: """Returns an ONNX model that accepts - "predictions_in" : 2D tensor of shape (n_samples, n_outputs) and type double. And outputs the transformed predictions. @@ -292,7 +293,7 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: pred[:, :, 1] = cn.clip(pred[:, :, 1], -5, 5) return pred - def onnx_transform(self) -> cn.ndarray: + def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: from onnx import TensorProto, numpy_helper from onnx.checker import check_model from onnx.helper import ( @@ -457,6 +458,45 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: """Inverse log link.""" return cn.exp(pred) + @override + def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: + from onnx import TensorProto + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + ) + + predictions_in = make_tensor_value_info( + "predictions_in", + TensorProto.DOUBLE, + [None, None], + ) + predictions_out = make_tensor_value_info( + "predictions_out", + TensorProto.DOUBLE, + [None, None], + ) + nodes = [] + # exp + nodes.append(make_node("Exp", ["predictions_in"], ["predictions_out"])) + + graph = make_graph( + nodes, + "GammaDevianceObjective", + [predictions_in], + [predictions_out], + ) + onnx_model = make_model( + graph, + opset_imports=[make_opsetid("", 21)], + ) + check_model(onnx_model) + return onnx_model + def initialise_prediction( self, y: cn.ndarray, w: cn.ndarray, boost_from_average: bool ) -> cn.ndarray: @@ -501,6 +541,53 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: assert pred.ndim == 3 return cn.exp(pred) + @override + def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: + from onnx import TensorProto, numpy_helper + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + ) + + predictions_in = make_tensor_value_info( + "predictions_in", + TensorProto.DOUBLE, + [None, None], + ) + predictions_out = make_tensor_value_info( + "predictions_out", + TensorProto.DOUBLE, + [None, None, 2], + ) + nodes = [] + # reshape + out_shape = numpy_helper.from_array( + np.array([0, -1, 2], dtype=np.int64), name="out_shape" + ) + nodes.append( + make_node("Reshape", ["predictions_in", "out_shape"], ["reshaped"]) + ) + # exp + nodes.append(make_node("Exp", ["reshaped"], ["predictions_out"])) + + graph = make_graph( + nodes, + "GammaObjective", + [predictions_in], + [predictions_out], + [out_shape], + ) + onnx_model = make_model( + graph, + opset_imports=[make_opsetid("", 21)], + ) + check_model(onnx_model) + return onnx_model + @override def metric(self) -> GammaLLMetric: return GammaLLMetric() @@ -647,6 +734,46 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: div = cn.sum(e_x, axis=1) return e_x / div[:, cn.newaxis] + def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: + from onnx import TensorProto + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + ) + + predictions_in = make_tensor_value_info( + "predictions_in", + TensorProto.DOUBLE, + [None, None], + ) + predictions_out = make_tensor_value_info( + "predictions_out", + TensorProto.DOUBLE, + [None, None], + ) + nodes = [] + if pred.shape[1] == 1: + nodes.append(make_node("Sigmoid", ["predictions_in"], ["predictions_out"])) + else: + nodes.append(make_node("Softmax", ["predictions_in"], ["predictions_out"])) + graph = make_graph( + nodes, + "LogLossObjective", + [predictions_in], + [predictions_out], + [], + ) + onnx_model = make_model( + graph, + opset_imports=[make_opsetid("", 21)], + ) + check_model(onnx_model) + return onnx_model + def metric(self) -> LogLossMetric: return LogLossMetric() @@ -683,6 +810,43 @@ def gradient(self, y: cn.ndarray, pred: cn.ndarray) -> GradPair: def transform(self, pred: cn.ndarray) -> cn.ndarray: return self.one / (self.one + cn.exp(-pred)) + def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: + from onnx import TensorProto + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + ) + + predictions_in = make_tensor_value_info( + "predictions_in", + TensorProto.DOUBLE, + [None, None], + ) + predictions_out = make_tensor_value_info( + "predictions_out", + TensorProto.DOUBLE, + [None, None], + ) + nodes = [] + nodes.append(make_node("Sigmoid", ["predictions_in"], ["predictions_out"])) + graph = make_graph( + nodes, + "MultiLabelObjective", + [predictions_in], + [predictions_out], + [], + ) + onnx_model = make_model( + graph, + opset_imports=[make_opsetid("", 21)], + ) + check_model(onnx_model) + return onnx_model + def output_class(self, pred: cn.ndarray) -> cn.ndarray: return cn.array(pred > 0.5, dtype=cn.int32).squeeze() @@ -750,6 +914,56 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: K = pred.shape[1] # number of classes return logloss.transform((1 / (K - 1)) * pred) + def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: + from onnx import TensorProto, numpy_helper + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + ) + + predictions_in = make_tensor_value_info( + "predictions_in", + TensorProto.DOUBLE, + [None, None], + ) + predictions_out = make_tensor_value_info( + "predictions_out", + TensorProto.DOUBLE, + [None, None], + ) + + nodes = [] + initializers = [] + if pred.shape[1] == 1: + two = numpy_helper.from_array(np.array(2, dtype=np.float64), name="two") + nodes.append(make_node("Mul", ["predictions_in", "two"], ["multiplied"])) + nodes.append(make_node("Sigmoid", ["multiplied"], ["predictions_out"])) + initializers.append(two) + else: + constant = numpy_helper.from_array( + np.array(1 / (pred.shape[1] - 1), dtype=np.float64), name="constant" + ) + nodes.append(make_node("Mul", ["predictions_in", "constant"], ["scaled"])) + nodes.append(make_node("Softmax", ["scaled"], ["predictions_out"])) + initializers.append(constant) + graph = make_graph( + nodes, + "ExpObjective", + [predictions_in], + [predictions_out], + initializers, + ) + onnx_model = make_model( + graph, + opset_imports=[make_opsetid("", 21)], + ) + check_model(onnx_model) + return onnx_model + def metric(self) -> ExponentialMetric: return ExponentialMetric() diff --git a/legateboost/test/test_objective.py b/legateboost/test/test_objective.py index aaf8823c..a78ecb80 100644 --- a/legateboost/test/test_objective.py +++ b/legateboost/test/test_objective.py @@ -1,3 +1,5 @@ +import numpy as np +import onnxruntime as ort import pytest import cupynumeric as cn @@ -5,12 +7,26 @@ from legateboost.testing.utils import non_increasing +def compare_onnx_transform(obj, pred): + sess = ort.InferenceSession(obj.onnx_transform(pred).SerializeToString()) + feeds = { + "predictions_in": pred, + } + onnx_transform = sess.run(None, feeds)[0] + assert onnx_transform.dtype == np.float64 + transform = obj.transform(cn.array(pred)) + assert transform.shape == onnx_transform.shape + assert np.allclose(onnx_transform, transform, atol=1e-6) + + def test_normal() -> None: obj = lb.NormalObjective() y = cn.array([[1.0], [2.0], [3.0]]) init = obj.initialise_prediction(y, cn.array([1.0, 1.0, 1.0]), True) assert cn.allclose(init, cn.array([y.mean(), cn.log(y.std())])) + compare_onnx_transform(obj, np.arange(12).reshape(2, 6).astype(np.float64)) + def test_gamma_deviance() -> None: obj = lb.GammaDevianceObjective() @@ -35,6 +51,8 @@ def test_gamma_deviance() -> None: reg.fit(X, y1, eval_set=[(X, y1)], eval_result=eval_result) assert non_increasing(eval_result["train"]["deviance_gamma"]) + compare_onnx_transform(obj, np.arange(12).reshape(2, 6).astype(np.float64)) + def test_gamma() -> None: import numpy as np @@ -51,6 +69,8 @@ def test_gamma() -> None: reg.fit(X, y, eval_set=[(X, y)], eval_result=eval_result) assert non_increasing(eval_result["train"]["gamma_neg_ll"]) + compare_onnx_transform(obj, np.arange(12).reshape(2, 6).astype(np.float64)) + def test_log_loss() -> None: obj = lb.LogLossObjective() @@ -95,6 +115,9 @@ def test_log_loss() -> None: False, ) + compare_onnx_transform(obj, np.arange(12).reshape(2, 6).astype(np.float64)) + compare_onnx_transform(obj, np.arange(4).reshape(4, 1).astype(np.float64)) + def test_exp(): obj = lb.ExponentialObjective() @@ -127,6 +150,9 @@ def test_exp(): False, ) + compare_onnx_transform(obj, np.arange(12).reshape(2, 6).astype(np.float64)) + compare_onnx_transform(obj, np.arange(4).reshape(4, 1).astype(np.float64)) + def test_multi_label(): obj = lb.MultiLabelObjective() @@ -139,3 +165,5 @@ def test_multi_label(): with pytest.raises(ValueError, match=r"Expected labels to be in \[0, 1\]"): obj.initialise_prediction(cn.array([[1], [2]]), cn.array([[1.0], [1.0]]), False) + + compare_onnx_transform(obj, np.arange(12).reshape(2, 6).astype(np.float64)) From 2d32592ebce89c18280053e6de1daa2714884094 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Fri, 11 Apr 2025 02:24:45 -0700 Subject: [PATCH 12/21] Classifier tests passing for float64 but not float32 --- legateboost/legateboost.py | 349 ++++++++++++++++++++++++---------- legateboost/objectives.py | 103 +++++++++- legateboost/test/test_onnx.py | 4 +- 3 files changed, 357 insertions(+), 99 deletions(-) diff --git a/legateboost/legateboost.py b/legateboost/legateboost.py index 62dc48f7..e6dd45e6 100644 --- a/legateboost/legateboost.py +++ b/legateboost/legateboost.py @@ -560,6 +560,50 @@ def dump_models(self) -> str: text += str(m) return text + def _make_onnx_reshape_predictions(self, pred: cn.ndarray) -> cn.ndarray: + from onnx import TensorProto, numpy_helper + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + np_dtype_to_tensor_dtype, + ) + + # make an onnx model that shapes the predictions equivalently to pred + extra_out_shape = [] if pred.ndim == 1 else list(pred.shape[1:]) + shape = numpy_helper.from_array(np.array([-1] + extra_out_shape), name="shape") + + predictions_in = make_tensor_value_info( + "predictions_in", + TensorProto.DOUBLE, + [None, None], + ) + predictions_out = make_tensor_value_info( + "predictions_out", + np_dtype_to_tensor_dtype(pred.dtype), + shape=[None] + list(extra_out_shape), + ) + nodes = [ + make_node( + "Reshape", + ["predictions_in", "shape"], + ["predictions_out"], + ) + ] + graph = make_graph( + nodes, + "reshape output", + [predictions_in], + [predictions_out], + [shape], + ) + model = make_model(graph, opset_imports=[make_opsetid("", 21)]) + check_model(model) + return model + def _make_onnx_init(self, X_dtype): # turn self.model_init_ into an ONNX model from onnx import TensorProto, numpy_helper @@ -613,56 +657,9 @@ def _make_onnx_init(self, X_dtype): return onnx_model - def to_onnx(self, X: cn.ndarray, predict_function="predict"): - """Converts the estimator to an ONNX model which is expected to produce - equivalent predictions to `predict_function` up to reasonable floating - point tolerance. The ONNX model is hard coded to the X input data type, - separate models should be generated for float and double. The ONNX model - takes "X_in" as input and produces "predictions_out" as output. - - Parameters - ---------- - X: - Example input data. Use to infer input data characteristics. - A model produced for float32 will not accept float64 input and vice versa. - predict_function : str - The serialised ONNX model can produce output equivalent to 'predict', - 'predict_proba', or 'predict_raw'. - The default is "predict". - Returns - ------- - Any - The ONNX model. - - Examples - -------- - >>> import numpy as np - >>> import legateboost as lb - >>> X = np.random.random((1000, 10)) - >>> y = np.random.random(X.shape[0]) - >>> model = lb.LBRegressor(n_estimators=5).fit(X, y) - >>> import onnxruntime as ort - >>> sess = ort.InferenceSession(model.to_onnx(X.dtype).SerializeToString()) - >>> onnx_pred = sess.run(None, {"X_in": X})[0] - >>> assert np.allclose(model.predict(X), onnx_pred, atol=1e-6) - >>> - """ - if predict_function not in ["predict", "predict_proba", "predict_raw"]: - raise ValueError( - "predict_function should be one of " - "['predict', 'predict_proba', 'predict_raw']" - ) - - from onnx import TensorProto, numpy_helper + def _to_onnx_predict_raw(self, X: cn.ndarray): from onnx.checker import check_model from onnx.compose import merge_models - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - ) model = self._make_onnx_init(X.dtype) if self.models_ is not None and len(self.models_) > 0: @@ -685,60 +682,26 @@ def to_onnx(self, X: cn.ndarray, predict_function="predict"): ) # remove the X_out output, we only need the predictions - # add a transform operator model.graph.output.remove(model.graph.output[0]) - # add any transform from the objective - if predict_function == "predict": - model = merge_models( - model, - self._objective_instance.onnx_transform(self.predict_raw(X[0:1])), - io_map=[ - ( - "model_{}_predictions_out".format(len(self.models_) - 1), - "predictions_in", - ) - ], - prefix2="transform_", - ) - # coerce the output shape to be the same as the equivalent predict function - test_pred = getattr(self, predict_function)(X[0:1]) + check_model(model) + return model - extra_out_shape = [] if test_pred.ndim == 1 else list(test_pred.shape[1:]) - shape = numpy_helper.from_array( - np.array([-1] + extra_out_shape), name="shape" - ) + def _to_onnx_predict_transformed(self, X: cn.ndarray): + from onnx.checker import check_model + from onnx.compose import merge_models - reshape_predictions_in = make_tensor_value_info( - "reshape_predictions_in", - TensorProto.DOUBLE, - [None, None], - ) - reshaped_predictions = make_tensor_value_info( - "reshaped_predictions", - TensorProto.DOUBLE, - shape=[None] + list(extra_out_shape), - ) - nodes = [ - make_node( - "Reshape", - ["reshape_predictions_in", "shape"], - ["reshaped_predictions"], + model = merge_models( + self._to_onnx_predict_raw(X), + self._objective_instance.onnx_transform(self.predict_raw(X[0:1])), + io_map=[ + ( + "model_{}_predictions_out".format(len(self.models_) - 1), + "predictions_in", ) - ] - graph = make_graph( - nodes, - "legateboost estimator transform", - [reshape_predictions_in], - [reshaped_predictions], - [shape], - ) - model = merge_models( - model, - make_model(graph, opset_imports=[make_opsetid("", 21)]), - io_map=[("transform_predictions_out", "reshape_predictions_in")], - ) - + ], + prefix2="transform_", + ) check_model(model) return model @@ -1029,6 +992,63 @@ def predict(self, X: cn.ndarray) -> cn.ndarray: pred = pred.squeeze(axis=1) return pred + def to_onnx(self, X: cn.ndarray, predict_function: str = "predict"): + """Converts the estimator to an ONNX model which is expected to produce + equivalent predictions to `predict_function` up to reasonable floating + point tolerance. The ONNX model is hard coded to the X input data type, + separate models should be generated for float and double. The ONNX model + takes "X_in" as input and produces "predictions_out" as output. + + Parameters + ---------- + X: + Example input data. Use to infer input data characteristics. + A model produced for float32 will not accept float64 input and vice versa. + predict_function : str + The serialised ONNX model can produce output equivalent to 'predict' or + 'predict_raw'. + The default is "predict". + Returns + ------- + Any + The ONNX model. + + Examples + -------- + >>> import numpy as np + >>> import legateboost as lb + >>> X = np.random.random((1000, 10)) + >>> y = np.random.random(X.shape[0]) + >>> model = lb.LBRegressor(n_estimators=5).fit(X, y) + >>> import onnxruntime as ort + >>> sess = ort.InferenceSession(model.to_onnx(X.dtype).SerializeToString()) + >>> onnx_pred = sess.run(None, {"X_in": X})[0] + >>> assert np.allclose(model.predict(X), onnx_pred, atol=1e-6) + >>> + """ + from onnx.checker import check_model + from onnx.compose import merge_models + + if predict_function not in ["predict", "predict_raw"]: + raise ValueError( + "predict_function should be one of ['predict', 'predict_raw']" + ) + if predict_function == "predict": + model = self._to_onnx_predict_transformed(X) + else: + model = self._to_onnx_predict_raw(X) + + # coerce the output shape to be the same as the equivalent predict function + test_pred = getattr(self, predict_function)(X[0:1]) + model = merge_models( + model, + self._make_onnx_reshape_predictions(test_pred), + io_map=[(model.graph.output[0].name, "predictions_in")], + prefix2="reshape_", + ) + check_model(model) + return model + class LBClassifier(ClassifierMixin, LBBase): """Implements a gradient boosting algorithm for classification problems. @@ -1274,3 +1294,138 @@ def predict(self, X: cn.ndarray) -> cn.ndarray: """ check_is_fitted(self) return self._objective_instance.output_class(self.predict_proba(X)) + + def _mirror_predict_proba_output(self, model) -> cn.ndarray: + assert len(self.classes_) == 2 + from onnx import TensorProto, numpy_helper + from onnx.checker import check_model + from onnx.compose import merge_models + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + ) + + nodes = [] + predictions_in = make_tensor_value_info( + "predictions_in", + TensorProto.DOUBLE, + [None, None], + ) + predictions_out = make_tensor_value_info( + "predictions_out", + TensorProto.DOUBLE, + [None, 2], + ) + one = numpy_helper.from_array(np.array([1.0], dtype=np.float64), name="one") + nodes.append(make_node("Sub", ["one", "predictions_in"], ["false_probability"])) + nodes.append( + make_node( + "Concat", + ["false_probability", "predictions_in"], + ["predictions_out"], + axis=1, + ) + ) + + graph = make_graph( + nodes, + "mirror predict proba", + [predictions_in], + [predictions_out], + [one], + ) + new_model = make_model( + graph, + opset_imports=[ + make_opsetid("", 21), + ], + ) + new_model = merge_models( + model, + new_model, + io_map=[ + (model.graph.output[0].name, "predictions_in"), + ], + prefix2="mirror_", + ) + check_model(new_model) + return new_model + + def to_onnx(self, X: cn.ndarray, predict_function: str = "predict"): + """Converts the estimator to an ONNX model which is expected to produce + equivalent predictions to `predict_function` up to reasonable floating + point tolerance. The ONNX model is hard coded to the X input data type, + separate models should be generated for float and double. The ONNX model + takes "X_in" as input and produces "predictions_out" as output. + + Parameters + ---------- + X: + Example input data. Use to infer input data characteristics. + A model produced for float32 will not accept float64 input and vice versa. + predict_function : str + The serialised ONNX model can produce output equivalent to 'predict', + 'predict_proba', or 'predict_raw'. + The default is "predict". + Returns + ------- + Any + The ONNX model. + + Examples + -------- + >>> import numpy as np + >>> import legateboost as lb + >>> X = np.random.random((1000, 10)) + >>> y = np.random.randint(0, 2, X.shape[0]) + >>> model = lb.LBClassifier(n_estimators=5).fit(X, y) + >>> import onnxruntime as ort + >>> sess = ort.InferenceSession(model.to_onnx(X.dtype, + ... predict_function="predict_proba").SerializeToString()) + >>> onnx_pred = sess.run(None, {"X_in": X})[0] + >>> assert np.allclose(model.predict_proba(X), onnx_pred, atol=1e-6) + >>> + """ + from onnx.checker import check_model + from onnx.compose import merge_models + + if predict_function not in ["predict", "predict_proba", "predict_raw"]: + raise ValueError( + "predict_function should be one of ['predict'," + " 'predict_proba', 'predict_raw']" + ) + if predict_function in ["predict_proba", "predict"]: + model = self._to_onnx_predict_transformed(X) + # need to mirror the output when we only output one target + if self.predict_raw(X[0:1]).shape[1] == 1: + model = self._mirror_predict_proba_output(model) + if predict_function == "predict": + # argmax the predict_proba output + argmax = self._objective_instance.onnx_output_class( + self.predict_proba(X[0:1]) + ) + model = merge_models( + model, + argmax, + io_map=[ + (model.graph.output[0].name, "predictions_in"), + ], + prefix2="classifier_predict_", + ) + + elif predict_function == "predict_raw": + model = self._to_onnx_predict_raw(X) + + # coerce the output shape to be the same as the equivalent predict function + test_pred = getattr(self, predict_function)(X[0:1]) + model = merge_models( + model, + self._make_onnx_reshape_predictions(test_pred), + io_map=[(model.graph.output[0].name, "predictions_in")], + prefix2="reshape_", + ) + check_model(model) + return model diff --git a/legateboost/objectives.py b/legateboost/objectives.py index 5c59330d..e6d770b0 100644 --- a/legateboost/objectives.py +++ b/legateboost/objectives.py @@ -165,6 +165,54 @@ def output_class(self, pred: cn.ndarray) -> cn.ndarray: """ return cn.argmax(pred, axis=-1) + def onnx_output_class(self, pred: cn.ndarray): + """Returns an ONNX model that accepts + - "predictions_in" : 2D tensor of shape (n_samples, n_outputs) and type double. + And outputs the predicted class labels. + - "predictions_out" : 1D tensor of shape (n_samples,) and type int32. + + Returns: + Onnx model that converts probabilities into class labels. + """ + from onnx import TensorProto + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + ) + + predictions_in = make_tensor_value_info( + "predictions_in", + TensorProto.DOUBLE, + [None, None], + ) + predictions_out = make_tensor_value_info( + "predictions_out", + TensorProto.INT64, + [None], + ) + nodes = [] + nodes.append( + make_node( + "ArgMax", ["predictions_in"], ["predictions_out"], axis=-1, keepdims=0 + ) + ) + graph = make_graph( + nodes, + "OutputClass", + [predictions_in], + [predictions_out], + ) + onnx_model = make_model( + graph, + opset_imports=[make_opsetid("", 21)], + ) + check_model(onnx_model) + return onnx_model + class SquaredErrorObjective(BaseObjective): """The Squared Error objective function for regression problems. @@ -848,7 +896,60 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: return onnx_model def output_class(self, pred: cn.ndarray) -> cn.ndarray: - return cn.array(pred > 0.5, dtype=cn.int32).squeeze() + return cn.array(pred > 0.5, dtype=cn.int64) + + def onnx_output_class(self, pred: cn.ndarray): + """Returns an ONNX model that accepts + - "predictions_in" : 2D tensor of shape (n_samples, n_outputs) and type double. + And outputs the predicted class labels. + - "predictions_out" : 1D tensor of shape (n_samples,) and type int32. + + Returns: + Onnx model that converts probabilities into class labels. + """ + from onnx import TensorProto, numpy_helper + from onnx.checker import check_model + from onnx.helper import ( + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, + ) + + predictions_in = make_tensor_value_info( + "predictions_in", + TensorProto.DOUBLE, + [None, None], + ) + predictions_out = make_tensor_value_info( + "predictions_out", + TensorProto.INT64, + [None], + ) + nodes = [] + half = numpy_helper.from_array(np.array(0.5, dtype=np.float64), name="half") + nodes.append( + make_node("Greater", ["predictions_in", "half"], ["comparison_result"]) + ) + nodes.append( + make_node( + "Cast", ["comparison_result"], ["predictions_out"], to=TensorProto.INT64 + ) + ) + graph = make_graph( + nodes, + "OutputClass", + [predictions_in], + [predictions_out], + [half], + ) + onnx_model = make_model( + graph, + opset_imports=[make_opsetid("", 21)], + ) + check_model(onnx_model) + return onnx_model def metric(self) -> MultiLabelMetric: return MultiLabelMetric() diff --git a/legateboost/test/test_onnx.py b/legateboost/test/test_onnx.py index fc686b1f..2aa0e250 100644 --- a/legateboost/test/test_onnx.py +++ b/legateboost/test/test_onnx.py @@ -34,7 +34,7 @@ def compare_estimator_predictions(estimator, X, predict_function): pred = pred_method(cn.array(X)) onnx_pred = sess.run(None, feeds)[0] - assert onnx_pred.dtype == np.float64 + assert onnx_pred.dtype == pred.dtype assert pred.shape == onnx_pred.shape assert np.allclose( onnx_pred, pred, atol=1e-2 if X.dtype == np.float32 else 1e-6 @@ -145,6 +145,8 @@ def test_classifier(Model, objective, classification_dataset): ).fit(X, y) compare_estimator_predictions(model, X, "predict_raw") + compare_estimator_predictions(model, X, "predict_proba") + compare_estimator_predictions(model, X, "predict") @pytest.mark.parametrize("dtype", [np.float32, np.float64]) From 039a8e1ed186431d42c5b415ecaeb150a175fcdf Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Fri, 11 Apr 2025 04:44:56 -0700 Subject: [PATCH 13/21] Compensate for tolerance --- legateboost/test/test_onnx.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/legateboost/test/test_onnx.py b/legateboost/test/test_onnx.py index 2aa0e250..3d75300b 100644 --- a/legateboost/test/test_onnx.py +++ b/legateboost/test/test_onnx.py @@ -19,11 +19,11 @@ def compare_model_predictions(model, X): pred = pred.squeeze() assert pred.shape == onnx_pred.shape assert np.allclose( - onnx_pred, pred, atol=1e-3 if X.dtype == np.float32 else 1e-6 + onnx_pred, pred, atol=1e-2 if X.dtype == np.float32 else 1e-6 ), np.linalg.norm(pred - onnx_pred) -def compare_estimator_predictions(estimator, X, predict_function): +def compare_estimator_predictions(estimator, X, predict_function, allowed_wrong=0): sess = ort.InferenceSession( estimator.to_onnx(X, predict_function).SerializeToString() ) @@ -36,9 +36,10 @@ def compare_estimator_predictions(estimator, X, predict_function): assert onnx_pred.dtype == pred.dtype assert pred.shape == onnx_pred.shape - assert np.allclose( - onnx_pred, pred, atol=1e-2 if X.dtype == np.float32 else 1e-6 - ), np.linalg.norm(pred - onnx_pred) + number_wrong = np.sum( + np.abs(pred - onnx_pred) > 1e-2 if X.dtype == np.float32 else 1e-6 + ) + assert number_wrong <= allowed_wrong @pytest.fixture @@ -146,7 +147,11 @@ def test_classifier(Model, objective, classification_dataset): compare_estimator_predictions(model, X, "predict_raw") compare_estimator_predictions(model, X, "predict_proba") - compare_estimator_predictions(model, X, "predict") + # softmax has numerical differences with float32 + # allow a very small number of different class predictions + # this is fine so long as the probabilities are close + allowed_wrong = 5 if y.max() > 1 and X.dtype == np.float32 else 0 + compare_estimator_predictions(model, X, "predict", allowed_wrong) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) From aea209cb9fd20122369cda79be17f625a03f00c1 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Fri, 11 Apr 2025 07:14:43 -0700 Subject: [PATCH 14/21] Convert some operators to text --- legateboost/objectives.py | 332 +++++++++++--------------------------- 1 file changed, 95 insertions(+), 237 deletions(-) diff --git a/legateboost/objectives.py b/legateboost/objectives.py index e6d770b0..9819af28 100644 --- a/legateboost/objectives.py +++ b/legateboost/objectives.py @@ -86,39 +86,19 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: Returns: Onnx model that transforms the predictions. """ - from onnx import TensorProto - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - ) - - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, None], - ) - predictions_out = make_tensor_value_info( - "predictions_out", - TensorProto.DOUBLE, - [None, None], - ) - nodes = [make_node("Identity", ["predictions_in"], ["predictions_out"])] - graph = make_graph( - nodes, - "BaseModel", - [predictions_in], - [predictions_out], - ) - onnx_model = make_model( - graph, - opset_imports=[make_opsetid("", 21)], - ) - check_model(onnx_model) - return onnx_model + import onnx + + onnx_text = """ + < + ir_version: 9, + opset_import: ["" : 10] + > + BaseObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) + { + predictions_out = Identity(predictions_in) + } + """ + return onnx.parser.parse_model(onnx_text) @abstractmethod def metric(self) -> BaseMetric: @@ -342,91 +322,31 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: return pred def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: - from onnx import TensorProto, numpy_helper - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - ) - - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, None], - ) - predictions_out = make_tensor_value_info( - "predictions_out", - TensorProto.DOUBLE, - [None, None, 2], - ) - nodes = [] - # clip - mininmum = numpy_helper.from_array( - np.array(-5, dtype=np.float64), name="minimum" - ) - maximum = numpy_helper.from_array(np.array(5, dtype=np.float64), name="maximum") - # reshape - out_shape = numpy_helper.from_array( - np.array([0, -1, 2], dtype=np.int64), name="out_shape" - ) - nodes.append( - make_node("Reshape", ["predictions_in", "out_shape"], ["reshaped"]) - ) - - nodes.append(make_node("Shape", ["reshaped"], ["new_shape"])) - - var_starts = numpy_helper.from_array( - np.array([0, 0, 1], dtype=np.int64), name="var_starts" - ) - mean_starts = numpy_helper.from_array( - np.array([0, 0, 0], dtype=np.int64), name="mean_starts" - ) - - # extract mean and variance parts - axis = numpy_helper.from_array(np.array([0, 1, 2], dtype=np.int64), name="axis") - steps = numpy_helper.from_array( - np.array([1, 1, 2], dtype=np.int64), name="steps" - ) - nodes.append( - make_node( - "Slice", - ["reshaped", "var_starts", "new_shape", "axis", "steps"], - ["variance"], - ) - ) - nodes.append( - make_node( - "Slice", - ["reshaped", "mean_starts", "new_shape", "axis", "steps"], - ["mean"], - ) - ) - nodes.append( - make_node("Clip", ["variance", "minimum", "maximum"], ["clipped_variance"]) - ) - - # combine them again - nodes.append( - make_node( - "Concat", ["mean", "clipped_variance"], ["predictions_out"], axis=2 - ) - ) - graph = make_graph( - nodes, - "NormalObjective", - [predictions_in], - [predictions_out], - [out_shape, var_starts, mean_starts, axis, steps, mininmum, maximum], - ) - onnx_model = make_model( - graph, - opset_imports=[make_opsetid("", 21)], - ) - check_model(onnx_model) - return onnx_model + import onnx + + onnx_text = """ + < + ir_version: 9, + opset_import: ["" : 21] + > + NormalObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) + { + out_shape = Constant() + var_starts = Constant() + mean_starts = Constant() + axis = Constant() + steps = Constant() + min = Constant() + max = Constant() + reshaped = Reshape(predictions_in, out_shape) + new_shape = Shape(reshaped) + variance = Slice(reshaped, var_starts, new_shape, axis, steps) + mean = Slice(reshaped, mean_starts, new_shape, axis, steps) + clipped_variance = Clip(variance, min, max) + predictions_out = Concat(mean, clipped_variance) + } + """ + return onnx.parser.parse_model(onnx_text) @override def mean(self, param: cn.ndarray) -> cn.ndarray: @@ -783,44 +703,21 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: return e_x / div[:, cn.newaxis] def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: - from onnx import TensorProto - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - ) - - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, None], - ) - predictions_out = make_tensor_value_info( - "predictions_out", - TensorProto.DOUBLE, - [None, None], - ) - nodes = [] - if pred.shape[1] == 1: - nodes.append(make_node("Sigmoid", ["predictions_in"], ["predictions_out"])) - else: - nodes.append(make_node("Softmax", ["predictions_in"], ["predictions_out"])) - graph = make_graph( - nodes, - "LogLossObjective", - [predictions_in], - [predictions_out], - [], - ) - onnx_model = make_model( - graph, - opset_imports=[make_opsetid("", 21)], - ) - check_model(onnx_model) - return onnx_model + import onnx + + operator_to_use = "Sigmoid" if pred.shape[1] == 1 else "Softmax" + onnx_text = f""" + < + ir_version: 9, + opset_import: ["" : 10] + > + LogLossObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) + {{ + predictions_out = {operator_to_use}(predictions_in) + }} + """ + print(onnx_text) + return onnx.parser.parse_model(onnx_text) def metric(self) -> LogLossMetric: return LogLossMetric() @@ -859,41 +756,19 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: return self.one / (self.one + cn.exp(-pred)) def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: - from onnx import TensorProto - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - ) - - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, None], - ) - predictions_out = make_tensor_value_info( - "predictions_out", - TensorProto.DOUBLE, - [None, None], - ) - nodes = [] - nodes.append(make_node("Sigmoid", ["predictions_in"], ["predictions_out"])) - graph = make_graph( - nodes, - "MultiLabelObjective", - [predictions_in], - [predictions_out], - [], - ) - onnx_model = make_model( - graph, - opset_imports=[make_opsetid("", 21)], - ) - check_model(onnx_model) - return onnx_model + import onnx + + onnx_text = """ + < + ir_version: 9, + opset_import: ["" : 10] + > + MultiLabelObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) + { + predictions_out = Sigmoid(predictions_in) + } + """ # noqa: E501 + return onnx.parser.parse_model(onnx_text) def output_class(self, pred: cn.ndarray) -> cn.ndarray: return cn.array(pred > 0.5, dtype=cn.int64) @@ -1016,54 +891,37 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: return logloss.transform((1 / (K - 1)) * pred) def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: - from onnx import TensorProto, numpy_helper - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - ) + import onnx - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, None], - ) - predictions_out = make_tensor_value_info( - "predictions_out", - TensorProto.DOUBLE, - [None, None], - ) - - nodes = [] - initializers = [] if pred.shape[1] == 1: - two = numpy_helper.from_array(np.array(2, dtype=np.float64), name="two") - nodes.append(make_node("Mul", ["predictions_in", "two"], ["multiplied"])) - nodes.append(make_node("Sigmoid", ["multiplied"], ["predictions_out"])) - initializers.append(two) - else: - constant = numpy_helper.from_array( - np.array(1 / (pred.shape[1] - 1), dtype=np.float64), name="constant" - ) - nodes.append(make_node("Mul", ["predictions_in", "constant"], ["scaled"])) - nodes.append(make_node("Softmax", ["scaled"], ["predictions_out"])) - initializers.append(constant) - graph = make_graph( - nodes, - "ExpObjective", - [predictions_in], - [predictions_out], - initializers, - ) - onnx_model = make_model( - graph, - opset_imports=[make_opsetid("", 21)], - ) - check_model(onnx_model) - return onnx_model + onnx_text = """ + < + ir_version: 9, + opset_import: ["" : 10] + > + LogLossObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) + { + constant = Constant() + a = Mul(predictions_in, constant) + predictions_out = Sigmoid(a) + } + """ # noqa: E501 + return onnx.parser.parse_model(onnx_text) + + constant = 1 / (pred.shape[1] - 1) + onnx_text_multiclass = f""" + < + ir_version: 9, + opset_import: ["" : 10] + > + LogLossObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) + {{ + constant = Constant() + a = Mul(predictions_in, constant) + predictions_out = Softmax(a) + }} + """ + return onnx.parser.parse_model(onnx_text_multiclass) def metric(self) -> ExponentialMetric: return ExponentialMetric() From b069090aaf9535b556faab81767ba76a19410c08 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Thu, 17 Apr 2025 02:59:15 -0700 Subject: [PATCH 15/21] Reduce verbosity --- legateboost/legateboost.py | 131 ++++++------------- legateboost/models/krr.py | 130 ++++++------------ legateboost/models/linear.py | 87 ++++--------- legateboost/objectives.py | 239 +++++++++------------------------- legateboost/test/test_onnx.py | 7 +- 5 files changed, 169 insertions(+), 425 deletions(-) diff --git a/legateboost/legateboost.py b/legateboost/legateboost.py index e6dd45e6..6986cea2 100644 --- a/legateboost/legateboost.py +++ b/legateboost/legateboost.py @@ -561,101 +561,50 @@ def dump_models(self) -> str: return text def _make_onnx_reshape_predictions(self, pred: cn.ndarray) -> cn.ndarray: - from onnx import TensorProto, numpy_helper - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - np_dtype_to_tensor_dtype, - ) - # make an onnx model that shapes the predictions equivalently to pred - extra_out_shape = [] if pred.ndim == 1 else list(pred.shape[1:]) - shape = numpy_helper.from_array(np.array([-1] + extra_out_shape), name="shape") - - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, None], - ) - predictions_out = make_tensor_value_info( - "predictions_out", - np_dtype_to_tensor_dtype(pred.dtype), - shape=[None] + list(extra_out_shape), - ) - nodes = [ - make_node( - "Reshape", - ["predictions_in", "shape"], - ["predictions_out"], - ) - ] - graph = make_graph( - nodes, - "reshape output", - [predictions_in], - [predictions_out], - [shape], - ) - model = make_model(graph, opset_imports=[make_opsetid("", 21)]) - check_model(model) - return model + shape = list(pred.shape) + shape[0] = -1 + out_type = "int64" if pred.dtype == cn.int64 else "double" + import onnx + + onnx_text = f""" + < + ir_version: 10, + opset_import: ["" : 21] + > + ReshapePredictions ({out_type}[N, M] predictions_in) => ({out_type}{shape} predictions_out) + {{ + shape = Constant() + predictions_out = Reshape(predictions_in, shape) + }} + """ # noqa: E501 + return onnx.parser.parse_model(onnx_text) def _make_onnx_init(self, X_dtype): - # turn self.model_init_ into an ONNX model - from onnx import TensorProto, numpy_helper - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - np_dtype_to_tensor_dtype, - ) - - # model constants - X_in = make_tensor_value_info( - "X_in", np_dtype_to_tensor_dtype(X_dtype), [None, self.n_features_in_] - ) - nodes = [] - nodes.append(make_node("Shape", ["X_in"], ["n_rows"], end=1)) - one = numpy_helper.from_array(np.array([1], dtype=np.int64), name="one") - nodes.append(make_node("Concat", ["n_rows", "one"], ["tile_repeat"], axis=0)) - init = numpy_helper.from_array( - np.atleast_2d(self.model_init_.__array__()), name="init" - ) - prediction_out = make_tensor_value_info( - "predictions_out", - TensorProto.DOUBLE, - [None, self.model_init_.shape[0]], - ) - nodes.append(make_node("Tile", ["init", "tile_repeat"], ["predictions_out"])) - X_out = make_tensor_value_info( - "X_out", - np_dtype_to_tensor_dtype(X_dtype), - [None, None], - ) - nodes.append(make_node("Identity", ["X_in"], ["X_out"])) - graph = make_graph( - nodes, - "legateboost estimator init", - [X_in], - [X_out, prediction_out], - [init, one], - ) - onnx_model = make_model( - graph, - opset_imports=[ - make_opsetid("", 21), - ], + import onnx + + X_type_text = "double" if X_dtype == cn.float64 else "float" + onnx_text = f""" + < + ir_version: 10, + opset_import: ["" : 21] + > + ReshapePredictions ({X_type_text}[N, M] X_in) => ({X_type_text}[N, M] X_out, double[N, K] predictions_out) + {{ + X_out = Identity(X_in) + n_rows = Shape(X_in) + one = Constant() + tile_repeat = Concat(n_rows, one) + predictions_out = Tile(init, tile_repeat) + }} + """ # noqa: E501 + init_model = onnx.parser.parse_model(onnx_text) + init_model.graph.initializer.append( + onnx.numpy_helper.from_array( + np.atleast_2d(self.model_init_.__array__()), name="init" + ) ) - check_model(onnx_model) - - return onnx_model + return init_model def _to_onnx_predict_raw(self, X: cn.ndarray): from onnx.checker import check_model diff --git a/legateboost/models/krr.py b/legateboost/models/krr.py index da2e22ce..5ecaaebc 100644 --- a/legateboost/models/krr.py +++ b/legateboost/models/krr.py @@ -244,97 +244,41 @@ def __mul__(self, scalar: Any) -> "KRR": return new def to_onnx(self, X) -> Any: - from onnx import TensorProto, numpy_helper - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - np_dtype_to_tensor_dtype, + import onnx + + X_type_text = "double" if X.dtype == cn.float64 else "float" + denominator = -2.0 * self.sigma**2 + onnx_text = f""" + < + ir_version: 10, + opset_import: ["" : 21] + > + KRRModel ({X_type_text}[N, M] X_in, double[N, K] predictions_in) => ({X_type_text}[N, M] X_out, double[N, K] predictions_out) + {{ + X_out = Identity(X_in) + axis1 = Constant() + XX = ReduceSumSquare(X_in, axis1) + XY = Gemm(X_in, X_train) + YY = ReduceSumSquare(X_train, axis1) + reshape = Constant() + YY_reshaped = Reshape(YY, reshape) + add0 = Add(XX, XY) + l2 = Add(YY_reshaped, add0) + zero = Constant() + l2_clipped = Max(l2, zero) + denominator = Constant() + rbf0 = Div(l2_clipped, denominator) + K = Exp(rbf0) + dot = MatMul(K, betas) + dot_double = Cast(dot) + predictions_out = Add(dot_double, predictions_in) + }} + """ # noqa: E501 + model = onnx.parser.parse_model(onnx_text) + model.graph.initializer.extend( + [ + onnx.numpy_helper.from_array(self.betas_.__array__(), name="betas"), + onnx.numpy_helper.from_array(self.X_train.__array__(), name="X_train"), + ] ) - - assert self.X_train.dtype == self.betas_.dtype - - nodes = [] - - # model constants - betas = numpy_helper.from_array(self.betas_.__array__(), name="betas") - X_train = numpy_helper.from_array(self.X_train.__array__(), name="X_train") - - # pred inputs - n_features = self.X_train.shape[1] - n_outputs = self.betas_.shape[1] - X_in = make_tensor_value_info( - "X_in", np_dtype_to_tensor_dtype(self.betas_.dtype), [None, n_features] - ) - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, n_outputs], - ) - # exanded l2 distance - # distance = np.sum(X**2, axis=1)[:, np.newaxis] - 2 * np.dot(X, self.X_train.T) - # + np.sum(self.X_train**2, axis=1) - axis1 = numpy_helper.from_array(np.array([1]), name="axis1") - nodes.append(make_node("ReduceSumSquare", ["X_in", "axis1"], ["XX"])) - nodes.append( - make_node("Gemm", ["X_in", "X_train"], ["XY"], alpha=-2.0, transB=1) - ) - nodes.append(make_node("ReduceSumSquare", ["X_train", "axis1"], ["YY"])) - reshape = numpy_helper.from_array( - np.array([1, -1], dtype=np.int64), name="reshape" - ) - nodes.append(make_node("Reshape", ["YY", "reshape"], ["YY_reshaped"])) - nodes.append(make_node("Add", ["XX", "XY"], ["add0"])) - nodes.append(make_node("Add", ["YY_reshaped", "add0"], ["l2"])) - zero = numpy_helper.from_array(np.array([0.0], self.X_train.dtype), name="zero") - nodes.append(make_node("Max", ["l2", "zero"], ["l2_clipped"])) - - # RBF kernel - # K = np.exp(-distance / (2 * self.sigma**2)) - if self.sigma is None: - raise ValueError("sigma is None. Has fit been called?") - - denominator = numpy_helper.from_array( - np.array([-2.0 * self.sigma**2], self.X_train.dtype), name="denominator" - ) - nodes.append(make_node("Div", ["l2_clipped", "denominator"], ["rbf0"])) - nodes.append(make_node("Exp", ["rbf0"], ["K"])) - - # prediction - # pred = np.dot(K, self.betas_) - nodes.append(make_node("MatMul", ["K", "betas"], ["dot"])) - - # outputs - predictions_out = make_tensor_value_info( - "predictions_out", - TensorProto.DOUBLE, - [None, n_outputs], - ) - X_out = make_tensor_value_info( - "X_out", np_dtype_to_tensor_dtype(self.betas_.dtype), [None, n_features] - ) - - nodes.append(make_node("Cast", ["dot"], ["dot_double"], to=TensorProto.DOUBLE)) - nodes.append( - make_node("Add", ["dot_double", "predictions_in"], ["predictions_out"]) - ) - nodes.append(make_node("Identity", ["X_in"], ["X_out"])) - - graph = make_graph( - nodes, - "legateboost.model.KRR", - [X_in, predictions_in], - [X_out, predictions_out], - [betas, X_train, axis1, reshape, zero, denominator], - ) - onnx_model = make_model( - graph, - opset_imports=[ - make_opsetid("", 21), - ], - ) - check_model(onnx_model) - return onnx_model + return model diff --git a/legateboost/models/linear.py b/legateboost/models/linear.py index de88ef2c..edb30e61 100644 --- a/legateboost/models/linear.py +++ b/legateboost/models/linear.py @@ -153,65 +153,30 @@ def __mul__(self, scalar: Any) -> "Linear": return new def to_onnx(self, X) -> Any: - from onnx import TensorProto, numpy_helper - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - np_dtype_to_tensor_dtype, + import onnx + + X_type_text = "double" if X.dtype == cn.float64 else "float" + onnx_text = f""" + < + ir_version: 10, + opset_import: ["" : 21] + > + LinearModel ({X_type_text}[N, M] X_in, double[N, K] predictions_in) => ({X_type_text}[N, M] X_out, double[N, K] predictions_out) + {{ + X_out = Identity(X_in) + mult = MatMul(X_in, betas) + result = Add(mult, intercept) + result_double = Cast(result) + predictions_out = Add(result_double, predictions_in) + }} + """ # noqa: E501 + model = onnx.parser.parse_model(onnx_text) + model.graph.initializer.extend( + [ + onnx.numpy_helper.from_array(self.betas_[1:].__array__(), name="betas"), + onnx.numpy_helper.from_array( + self.betas_[0].__array__(), name="intercept" + ), + ] ) - - # model constants - betas = numpy_helper.from_array(self.betas_[1:].__array__(), name="betas") - intercept = numpy_helper.from_array( - self.betas_[0].__array__(), name="intercept" - ) - - # pred inputs - n_features = self.betas_.shape[0] - 1 - n_outputs = self.betas_.shape[1] - X_in = make_tensor_value_info( - "X_in", np_dtype_to_tensor_dtype(self.betas_.dtype), [None, n_features] - ) - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, n_outputs], - ) - predictions_out = make_tensor_value_info( - "predictions_out", - TensorProto.DOUBLE, - [None, n_outputs], - ) - - nodes = [] - nodes.append(make_node("MatMul", ["X_in", "betas"], ["XBeta"])) - nodes.append(make_node("Add", ["XBeta", "intercept"], ["result"])) - nodes.append( - make_node("Cast", ["result"], ["result_double"], to=TensorProto.DOUBLE) - ) - nodes.append( - make_node("Add", ["result_double", "predictions_in"], ["predictions_out"]) - ) - X_out = make_tensor_value_info( - "X_out", np_dtype_to_tensor_dtype(self.betas_.dtype), [None, n_features] - ) - nodes.append(make_node("Identity", ["X_in"], ["X_out"])) - graph = make_graph( - nodes, - "legateboost.model.Linear", - [X_in, predictions_in], - [X_out, predictions_out], - [betas, intercept], - ) - onnx_model = make_model( - graph, - opset_imports=[ - make_opsetid("", 21), - ], - ) - check_model(onnx_model) - return onnx_model + return model diff --git a/legateboost/objectives.py b/legateboost/objectives.py index 9819af28..d2607d2a 100644 --- a/legateboost/objectives.py +++ b/legateboost/objectives.py @@ -1,7 +1,6 @@ from abc import ABC, abstractmethod from typing import Tuple -import numpy as np from scipy.stats import norm from typing_extensions import TypeAlias, override @@ -90,8 +89,8 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: onnx_text = """ < - ir_version: 9, - opset_import: ["" : 10] + ir_version: 10, + opset_import: ["" : 21] > BaseObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) { @@ -154,44 +153,19 @@ def onnx_output_class(self, pred: cn.ndarray): Returns: Onnx model that converts probabilities into class labels. """ - from onnx import TensorProto - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - ) + import onnx - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, None], - ) - predictions_out = make_tensor_value_info( - "predictions_out", - TensorProto.INT64, - [None], - ) - nodes = [] - nodes.append( - make_node( - "ArgMax", ["predictions_in"], ["predictions_out"], axis=-1, keepdims=0 - ) - ) - graph = make_graph( - nodes, - "OutputClass", - [predictions_in], - [predictions_out], - ) - onnx_model = make_model( - graph, - opset_imports=[make_opsetid("", 21)], - ) - check_model(onnx_model) - return onnx_model + onnx_text = """ + < + ir_version: 10, + opset_import: ["" : 21] + > + BaseModelOutputClass (double[N, M] predictions_in) => (double[N, M] predictions_out) + { + predictions_out = ArgMax(predictions_in) + } + """ # noqa: E501 + return onnx.parser.parse_model(onnx_text) class SquaredErrorObjective(BaseObjective): @@ -326,7 +300,7 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: onnx_text = """ < - ir_version: 9, + ir_version: 10, opset_import: ["" : 21] > NormalObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) @@ -428,42 +402,19 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: @override def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: - from onnx import TensorProto - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - ) + import onnx - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, None], - ) - predictions_out = make_tensor_value_info( - "predictions_out", - TensorProto.DOUBLE, - [None, None], - ) - nodes = [] - # exp - nodes.append(make_node("Exp", ["predictions_in"], ["predictions_out"])) - - graph = make_graph( - nodes, - "GammaDevianceObjective", - [predictions_in], - [predictions_out], - ) - onnx_model = make_model( - graph, - opset_imports=[make_opsetid("", 21)], - ) - check_model(onnx_model) - return onnx_model + onnx_text = """ + < + ir_version: 10, + opset_import: ["" : 21] + > + GammaDevianceTransform (double[N, M] predictions_in) => (double[N, M] predictions_out) + { + predictions_out = Exp(predictions_in) + } + """ # noqa: E501 + return onnx.parser.parse_model(onnx_text) def initialise_prediction( self, y: cn.ndarray, w: cn.ndarray, boost_from_average: bool @@ -511,50 +462,19 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: @override def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: - from onnx import TensorProto, numpy_helper - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - ) + import onnx - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, None], - ) - predictions_out = make_tensor_value_info( - "predictions_out", - TensorProto.DOUBLE, - [None, None, 2], - ) - nodes = [] - # reshape - out_shape = numpy_helper.from_array( - np.array([0, -1, 2], dtype=np.int64), name="out_shape" - ) - nodes.append( - make_node("Reshape", ["predictions_in", "out_shape"], ["reshaped"]) - ) - # exp - nodes.append(make_node("Exp", ["reshaped"], ["predictions_out"])) - - graph = make_graph( - nodes, - "GammaObjective", - [predictions_in], - [predictions_out], - [out_shape], - ) - onnx_model = make_model( - graph, - opset_imports=[make_opsetid("", 21)], - ) - check_model(onnx_model) - return onnx_model + onnx_text = """ + < + ir_version: 10, + opset_import: ["" : 21] + > + GammaTransform (double[N, M] predictions_in) => (double[N, M] predictions_out) + { + predictions_out = Exp(predictions_in) + } + """ + return onnx.parser.parse_model(onnx_text) @override def metric(self) -> GammaLLMetric: @@ -708,15 +628,14 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: operator_to_use = "Sigmoid" if pred.shape[1] == 1 else "Softmax" onnx_text = f""" < - ir_version: 9, - opset_import: ["" : 10] + ir_version: 10, + opset_import: ["" : 21] > LogLossObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) {{ predictions_out = {operator_to_use}(predictions_in) }} """ - print(onnx_text) return onnx.parser.parse_model(onnx_text) def metric(self) -> LogLossMetric: @@ -760,8 +679,8 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: onnx_text = """ < - ir_version: 9, - opset_import: ["" : 10] + ir_version: 10, + opset_import: ["" : 21] > MultiLabelObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) { @@ -774,57 +693,21 @@ def output_class(self, pred: cn.ndarray) -> cn.ndarray: return cn.array(pred > 0.5, dtype=cn.int64) def onnx_output_class(self, pred: cn.ndarray): - """Returns an ONNX model that accepts - - "predictions_in" : 2D tensor of shape (n_samples, n_outputs) and type double. - And outputs the predicted class labels. - - "predictions_out" : 1D tensor of shape (n_samples,) and type int32. - - Returns: - Onnx model that converts probabilities into class labels. - """ - from onnx import TensorProto, numpy_helper - from onnx.checker import check_model - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - ) + import onnx - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, None], - ) - predictions_out = make_tensor_value_info( - "predictions_out", - TensorProto.INT64, - [None], - ) - nodes = [] - half = numpy_helper.from_array(np.array(0.5, dtype=np.float64), name="half") - nodes.append( - make_node("Greater", ["predictions_in", "half"], ["comparison_result"]) - ) - nodes.append( - make_node( - "Cast", ["comparison_result"], ["predictions_out"], to=TensorProto.INT64 - ) - ) - graph = make_graph( - nodes, - "OutputClass", - [predictions_in], - [predictions_out], - [half], - ) - onnx_model = make_model( - graph, - opset_imports=[make_opsetid("", 21)], - ) - check_model(onnx_model) - return onnx_model + onnx_text = """ + < + ir_version: 10, + opset_import: ["" : 21] + > + MultiLabelOutputClass (double[N, M] predictions_in) => (double[N, M] predictions_out) + { + half = Constant() + greater = Greater(predictions_in, half) + predictions_out = Cast(greater) + } + """ # noqa: E501 + return onnx.parser.parse_model(onnx_text) def metric(self) -> MultiLabelMetric: return MultiLabelMetric() @@ -896,8 +779,8 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: if pred.shape[1] == 1: onnx_text = """ < - ir_version: 9, - opset_import: ["" : 10] + ir_version: 10, + opset_import: ["" : 21] > LogLossObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) { @@ -911,8 +794,8 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: constant = 1 / (pred.shape[1] - 1) onnx_text_multiclass = f""" < - ir_version: 9, - opset_import: ["" : 10] + ir_version: 10, + opset_import: ["" : 21] > LogLossObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) {{ diff --git a/legateboost/test/test_onnx.py b/legateboost/test/test_onnx.py index 3d75300b..89f5a377 100644 --- a/legateboost/test/test_onnx.py +++ b/legateboost/test/test_onnx.py @@ -37,9 +37,12 @@ def compare_estimator_predictions(estimator, X, predict_function, allowed_wrong= assert onnx_pred.dtype == pred.dtype assert pred.shape == onnx_pred.shape number_wrong = np.sum( - np.abs(pred - onnx_pred) > 1e-2 if X.dtype == np.float32 else 1e-6 + np.abs(pred - onnx_pred) > (1e-2 if X.dtype == np.float32 else 1e-5) + ) + assert number_wrong <= allowed_wrong, ( + np.linalg.norm(pred - onnx_pred), + number_wrong, ) - assert number_wrong <= allowed_wrong @pytest.fixture From f4b7b81e6ee9f4c0bfeabe9aa7d61dd69a209eac Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Tue, 22 Apr 2025 04:45:42 -0700 Subject: [PATCH 16/21] Refactor and add typing annotations --- legateboost/legateboost.py | 201 ++++++------------------------- legateboost/models/base_model.py | 10 +- legateboost/models/krr.py | 13 +- legateboost/models/linear.py | 12 +- legateboost/models/nn.py | 14 +-- legateboost/models/tree.py | 16 +-- legateboost/objectives.py | 82 ++++--------- legateboost/onnx_utils.py | 104 ++++++++++++++++ legateboost/test/test_onnx.py | 10 +- 9 files changed, 188 insertions(+), 274 deletions(-) create mode 100644 legateboost/onnx_utils.py diff --git a/legateboost/legateboost.py b/legateboost/legateboost.py index 6986cea2..4899ec42 100644 --- a/legateboost/legateboost.py +++ b/legateboost/legateboost.py @@ -17,6 +17,13 @@ from .metrics import BaseMetric, metrics from .models import BaseModel, Tree from .objectives import OBJECTIVES_MAP, BaseObjective +from .onnx_utils import ( + init_predictions, + make_model, + merge_model_graphs, + mirror_predict_proba_output, + reshape_predictions, +) from .shapley import global_shapley_attributions, local_shapley_attributions from .utils import AddableMixin, AddMember, PickleCupynumericMixin @@ -560,87 +567,17 @@ def dump_models(self) -> str: text += str(m) return text - def _make_onnx_reshape_predictions(self, pred: cn.ndarray) -> cn.ndarray: - # make an onnx model that shapes the predictions equivalently to pred - shape = list(pred.shape) - shape[0] = -1 - out_type = "int64" if pred.dtype == cn.int64 else "double" - import onnx - - onnx_text = f""" - < - ir_version: 10, - opset_import: ["" : 21] - > - ReshapePredictions ({out_type}[N, M] predictions_in) => ({out_type}{shape} predictions_out) - {{ - shape = Constant() - predictions_out = Reshape(predictions_in, shape) - }} - """ # noqa: E501 - return onnx.parser.parse_model(onnx_text) - - def _make_onnx_init(self, X_dtype): - import onnx - - X_type_text = "double" if X_dtype == cn.float64 else "float" - onnx_text = f""" - < - ir_version: 10, - opset_import: ["" : 21] - > - ReshapePredictions ({X_type_text}[N, M] X_in) => ({X_type_text}[N, M] X_out, double[N, K] predictions_out) - {{ - X_out = Identity(X_in) - n_rows = Shape(X_in) - one = Constant() - tile_repeat = Concat(n_rows, one) - predictions_out = Tile(init, tile_repeat) - }} - """ # noqa: E501 - init_model = onnx.parser.parse_model(onnx_text) - init_model.graph.initializer.append( - onnx.numpy_helper.from_array( - np.atleast_2d(self.model_init_.__array__()), name="init" - ) - ) - return init_model - - def _to_onnx_predict_raw(self, X: cn.ndarray): - from onnx.checker import check_model - from onnx.compose import merge_models - - model = self._make_onnx_init(X.dtype) - if self.models_ is not None and len(self.models_) > 0: - model = merge_models( - model, - self.models_[0].to_onnx(X), - io_map=[("X_out", "X_in"), ("predictions_out", "predictions_in")], - prefix2="model_0_", - ) - - for i in range(1, len(self.models_)): - model = merge_models( - model, - self.models_[i].to_onnx(X), - io_map=[ - ("model_{}_X_out".format(i - 1), "X_in"), - ("model_{}_predictions_out".format(i - 1), "predictions_in"), - ], - prefix2="model_{}_".format(i), - ) - + def _to_onnx_predict_raw(self, X: cn.ndarray) -> Any: + init_graph = init_predictions(self.model_init_, X.dtype) + graph = merge_model_graphs([init_graph] + [m.to_onnx(X) for m in self.models_]) # remove the X_out output, we only need the predictions - model.graph.output.remove(model.graph.output[0]) + graph.output.remove(graph.output[0]) + return graph - check_model(model) - return model - - def _to_onnx_predict_transformed(self, X: cn.ndarray): - from onnx.checker import check_model - from onnx.compose import merge_models + def _to_onnx_predict_transformed(self, X: cn.ndarray) -> Any: + import onnx - model = merge_models( + graph = onnx.compose.merge_graphs( self._to_onnx_predict_raw(X), self._objective_instance.onnx_transform(self.predict_raw(X[0:1])), io_map=[ @@ -651,8 +588,7 @@ def _to_onnx_predict_transformed(self, X: cn.ndarray): ], prefix2="transform_", ) - check_model(model) - return model + return graph def global_attributions( self, @@ -941,7 +877,7 @@ def predict(self, X: cn.ndarray) -> cn.ndarray: pred = pred.squeeze(axis=1) return pred - def to_onnx(self, X: cn.ndarray, predict_function: str = "predict"): + def to_onnx(self, X: cn.ndarray, predict_function: str = "predict") -> Any: """Converts the estimator to an ONNX model which is expected to produce equivalent predictions to `predict_function` up to reasonable floating point tolerance. The ONNX model is hard coded to the X input data type, @@ -975,27 +911,22 @@ def to_onnx(self, X: cn.ndarray, predict_function: str = "predict"): >>> assert np.allclose(model.predict(X), onnx_pred, atol=1e-6) >>> """ - from onnx.checker import check_model - from onnx.compose import merge_models + import onnx if predict_function not in ["predict", "predict_raw"]: raise ValueError( "predict_function should be one of ['predict', 'predict_raw']" ) if predict_function == "predict": - model = self._to_onnx_predict_transformed(X) + graph = self._to_onnx_predict_transformed(X) else: - model = self._to_onnx_predict_raw(X) + graph = self._to_onnx_predict_raw(X) # coerce the output shape to be the same as the equivalent predict function test_pred = getattr(self, predict_function)(X[0:1]) - model = merge_models( - model, - self._make_onnx_reshape_predictions(test_pred), - io_map=[(model.graph.output[0].name, "predictions_in")], - prefix2="reshape_", - ) - check_model(model) + graph = reshape_predictions(graph, test_pred) + model = make_model(graph) + onnx.checker.check_model(model, full_check=True) return model @@ -1244,66 +1175,7 @@ def predict(self, X: cn.ndarray) -> cn.ndarray: check_is_fitted(self) return self._objective_instance.output_class(self.predict_proba(X)) - def _mirror_predict_proba_output(self, model) -> cn.ndarray: - assert len(self.classes_) == 2 - from onnx import TensorProto, numpy_helper - from onnx.checker import check_model - from onnx.compose import merge_models - from onnx.helper import ( - make_graph, - make_model, - make_node, - make_opsetid, - make_tensor_value_info, - ) - - nodes = [] - predictions_in = make_tensor_value_info( - "predictions_in", - TensorProto.DOUBLE, - [None, None], - ) - predictions_out = make_tensor_value_info( - "predictions_out", - TensorProto.DOUBLE, - [None, 2], - ) - one = numpy_helper.from_array(np.array([1.0], dtype=np.float64), name="one") - nodes.append(make_node("Sub", ["one", "predictions_in"], ["false_probability"])) - nodes.append( - make_node( - "Concat", - ["false_probability", "predictions_in"], - ["predictions_out"], - axis=1, - ) - ) - - graph = make_graph( - nodes, - "mirror predict proba", - [predictions_in], - [predictions_out], - [one], - ) - new_model = make_model( - graph, - opset_imports=[ - make_opsetid("", 21), - ], - ) - new_model = merge_models( - model, - new_model, - io_map=[ - (model.graph.output[0].name, "predictions_in"), - ], - prefix2="mirror_", - ) - check_model(new_model) - return new_model - - def to_onnx(self, X: cn.ndarray, predict_function: str = "predict"): + def to_onnx(self, X: cn.ndarray, predict_function: str = "predict") -> Any: """Converts the estimator to an ONNX model which is expected to produce equivalent predictions to `predict_function` up to reasonable floating point tolerance. The ONNX model is hard coded to the X input data type, @@ -1338,8 +1210,7 @@ def to_onnx(self, X: cn.ndarray, predict_function: str = "predict"): >>> assert np.allclose(model.predict_proba(X), onnx_pred, atol=1e-6) >>> """ - from onnx.checker import check_model - from onnx.compose import merge_models + import onnx if predict_function not in ["predict", "predict_proba", "predict_raw"]: raise ValueError( @@ -1347,34 +1218,30 @@ def to_onnx(self, X: cn.ndarray, predict_function: str = "predict"): " 'predict_proba', 'predict_raw']" ) if predict_function in ["predict_proba", "predict"]: - model = self._to_onnx_predict_transformed(X) + graph = self._to_onnx_predict_transformed(X) # need to mirror the output when we only output one target if self.predict_raw(X[0:1]).shape[1] == 1: - model = self._mirror_predict_proba_output(model) + graph = mirror_predict_proba_output(graph) if predict_function == "predict": # argmax the predict_proba output argmax = self._objective_instance.onnx_output_class( self.predict_proba(X[0:1]) ) - model = merge_models( - model, + graph = onnx.compose.merge_graphs( + graph, argmax, io_map=[ - (model.graph.output[0].name, "predictions_in"), + (graph.output[0].name, "predictions_in"), ], prefix2="classifier_predict_", ) elif predict_function == "predict_raw": - model = self._to_onnx_predict_raw(X) + graph = self._to_onnx_predict_raw(X) # coerce the output shape to be the same as the equivalent predict function test_pred = getattr(self, predict_function)(X[0:1]) - model = merge_models( - model, - self._make_onnx_reshape_predictions(test_pred), - io_map=[(model.graph.output[0].name, "predictions_in")], - prefix2="reshape_", - ) - check_model(model) + graph = reshape_predictions(graph, test_pred) + model = make_model(graph) + onnx.checker.check_model(model, full_check=True) return model diff --git a/legateboost/models/base_model.py b/legateboost/models/base_model.py index 45fac68d..38e37cb7 100644 --- a/legateboost/models/base_model.py +++ b/legateboost/models/base_model.py @@ -127,13 +127,13 @@ def __mul__(self, scalar: Any) -> "BaseModel": def __hash__(self) -> int: return hash(str(self)) - def to_onnx(self, X) -> Any: - """Convert the model to an ONNX model. + def to_onnx(self, X: cn.array) -> Any: + """Convert the model to an ONNX graph. - The implemented ONNX model should accept the following two inputs: + The implemented ONNX graph should accept the following two inputs: - "X_in" : 2D tensor of shape (n_samples, n_features) and type `X_dtype`. - "predictions in" : 2D tensor of shape (n_samples, n_outputs) and type double. - The model should output: + The graph should output: - "predictions out" : 2D tensor of shape (n_samples, n_outputs) and type double. Parameters @@ -146,6 +146,6 @@ def to_onnx(self, X) -> Any: Returns ------- Any - The ONNX model. + The ONNX graph. """ raise NotImplementedError diff --git a/legateboost/models/krr.py b/legateboost/models/krr.py index 5ecaaebc..0e1ea61d 100644 --- a/legateboost/models/krr.py +++ b/legateboost/models/krr.py @@ -243,16 +243,13 @@ def __mul__(self, scalar: Any) -> "KRR": self.betas_ *= scalar return new - def to_onnx(self, X) -> Any: + def to_onnx(self, X: cn.array) -> Any: import onnx X_type_text = "double" if X.dtype == cn.float64 else "float" + assert self.sigma is not None, "Has model been trained?" denominator = -2.0 * self.sigma**2 onnx_text = f""" - < - ir_version: 10, - opset_import: ["" : 21] - > KRRModel ({X_type_text}[N, M] X_in, double[N, K] predictions_in) => ({X_type_text}[N, M] X_out, double[N, K] predictions_out) {{ X_out = Identity(X_in) @@ -274,11 +271,11 @@ def to_onnx(self, X) -> Any: predictions_out = Add(dot_double, predictions_in) }} """ # noqa: E501 - model = onnx.parser.parse_model(onnx_text) - model.graph.initializer.extend( + graph = onnx.parser.parse_graph(onnx_text) + graph.initializer.extend( [ onnx.numpy_helper.from_array(self.betas_.__array__(), name="betas"), onnx.numpy_helper.from_array(self.X_train.__array__(), name="X_train"), ] ) - return model + return graph diff --git a/legateboost/models/linear.py b/legateboost/models/linear.py index edb30e61..1319d2de 100644 --- a/legateboost/models/linear.py +++ b/legateboost/models/linear.py @@ -152,15 +152,11 @@ def __mul__(self, scalar: Any) -> "Linear": new.betas_ *= scalar return new - def to_onnx(self, X) -> Any: + def to_onnx(self, X: cn.array) -> Any: import onnx X_type_text = "double" if X.dtype == cn.float64 else "float" onnx_text = f""" - < - ir_version: 10, - opset_import: ["" : 21] - > LinearModel ({X_type_text}[N, M] X_in, double[N, K] predictions_in) => ({X_type_text}[N, M] X_out, double[N, K] predictions_out) {{ X_out = Identity(X_in) @@ -170,8 +166,8 @@ def to_onnx(self, X) -> Any: predictions_out = Add(result_double, predictions_in) }} """ # noqa: E501 - model = onnx.parser.parse_model(onnx_text) - model.graph.initializer.extend( + graph = onnx.parser.parse_graph(onnx_text) + graph.initializer.extend( [ onnx.numpy_helper.from_array(self.betas_[1:].__array__(), name="betas"), onnx.numpy_helper.from_array( @@ -179,4 +175,4 @@ def to_onnx(self, X) -> Any: ), ] ) - return model + return graph diff --git a/legateboost/models/nn.py b/legateboost/models/nn.py index d628b6e5..969dc0df 100644 --- a/legateboost/models/nn.py +++ b/legateboost/models/nn.py @@ -182,14 +182,11 @@ def __mul__(self, scalar: Any) -> "NN": new.biases_[-1] *= scalar return new - def to_onnx(self, X) -> Any: + def to_onnx(self, X: cn.array) -> Any: from onnx import TensorProto, numpy_helper - from onnx.checker import check_model from onnx.helper import ( make_graph, - make_model, make_node, - make_opsetid, make_tensor_value_info, np_dtype_to_tensor_dtype, ) @@ -275,11 +272,4 @@ def to_onnx(self, X) -> Any: [X_out, predictions_out], biases + coefficients, ) - onnx_model = make_model( - graph, - opset_imports=[ - make_opsetid("", 21), - ], - ) - check_model(onnx_model) - return onnx_model + return graph diff --git a/legateboost/models/tree.py b/legateboost/models/tree.py index c6711172..cc23d59b 100644 --- a/legateboost/models/tree.py +++ b/legateboost/models/tree.py @@ -316,13 +316,10 @@ def __mul__(self, scalar: Any) -> "Tree": new.leaf_value *= scalar return new - def to_onnx(self, X) -> Any: - import onnx + def to_onnx(self, X: cn.array) -> Any: from onnx import TensorProto, numpy_helper - from onnx.checker import check_model from onnx.helper import ( make_graph, - make_model, make_node, make_tensor_value_info, np_dtype_to_tensor_dtype, @@ -427,13 +424,6 @@ def to_onnx(self, X) -> Any: [X_in, predictions_in], [X_out, predictions_out], [leaf_weights], + # opset_imports=[make_opsetid("ai.onnx.ml", 3), make_opsetid("", 21)], ) - model = make_model( - graph, - opset_imports=[ - onnx.helper.make_opsetid("ai.onnx.ml", 3), - onnx.helper.make_opsetid("", 21), - ], - ) - check_model(model) - return model + return graph diff --git a/legateboost/objectives.py b/legateboost/objectives.py index d2607d2a..1c819258 100644 --- a/legateboost/objectives.py +++ b/legateboost/objectives.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Tuple +from typing import Any, Tuple from scipy.stats import norm from typing_extensions import TypeAlias, override @@ -72,7 +72,7 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: return pred def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: - """Returns an ONNX model that accepts + """Returns an ONNX graph that accepts - "predictions_in" : 2D tensor of shape (n_samples, n_outputs) and type double. And outputs the transformed predictions. - "predictions_out" : arbitrary tensor depending on the objective. @@ -83,21 +83,17 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: method for each objective. Returns: - Onnx model that transforms the predictions. + Onnx graph that transforms the predictions. """ import onnx onnx_text = """ - < - ir_version: 10, - opset_import: ["" : 21] - > BaseObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) { predictions_out = Identity(predictions_in) } """ - return onnx.parser.parse_model(onnx_text) + return onnx.parser.parse_graph(onnx_text) @abstractmethod def metric(self) -> BaseMetric: @@ -144,7 +140,7 @@ def output_class(self, pred: cn.ndarray) -> cn.ndarray: """ return cn.argmax(pred, axis=-1) - def onnx_output_class(self, pred: cn.ndarray): + def onnx_output_class(self, pred: cn.ndarray) -> Any: """Returns an ONNX model that accepts - "predictions_in" : 2D tensor of shape (n_samples, n_outputs) and type double. And outputs the predicted class labels. @@ -156,16 +152,12 @@ def onnx_output_class(self, pred: cn.ndarray): import onnx onnx_text = """ - < - ir_version: 10, - opset_import: ["" : 21] - > BaseModelOutputClass (double[N, M] predictions_in) => (double[N, M] predictions_out) { predictions_out = ArgMax(predictions_in) } """ # noqa: E501 - return onnx.parser.parse_model(onnx_text) + return onnx.parser.parse_graph(onnx_text) class SquaredErrorObjective(BaseObjective): @@ -299,10 +291,6 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: import onnx onnx_text = """ - < - ir_version: 10, - opset_import: ["" : 21] - > NormalObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) { out_shape = Constant() @@ -320,7 +308,7 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: predictions_out = Concat(mean, clipped_variance) } """ - return onnx.parser.parse_model(onnx_text) + return onnx.parser.parse_graph(onnx_text) @override def mean(self, param: cn.ndarray) -> cn.ndarray: @@ -405,16 +393,12 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: import onnx onnx_text = """ - < - ir_version: 10, - opset_import: ["" : 21] - > GammaDevianceTransform (double[N, M] predictions_in) => (double[N, M] predictions_out) { predictions_out = Exp(predictions_in) } """ # noqa: E501 - return onnx.parser.parse_model(onnx_text) + return onnx.parser.parse_graph(onnx_text) def initialise_prediction( self, y: cn.ndarray, w: cn.ndarray, boost_from_average: bool @@ -465,16 +449,12 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: import onnx onnx_text = """ - < - ir_version: 10, - opset_import: ["" : 21] - > GammaTransform (double[N, M] predictions_in) => (double[N, M] predictions_out) { predictions_out = Exp(predictions_in) } """ - return onnx.parser.parse_model(onnx_text) + return onnx.parser.parse_graph(onnx_text) @override def metric(self) -> GammaLLMetric: @@ -627,16 +607,12 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: operator_to_use = "Sigmoid" if pred.shape[1] == 1 else "Softmax" onnx_text = f""" - < - ir_version: 10, - opset_import: ["" : 21] - > LogLossObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) {{ predictions_out = {operator_to_use}(predictions_in) }} """ - return onnx.parser.parse_model(onnx_text) + return onnx.parser.parse_graph(onnx_text) def metric(self) -> LogLossMetric: return LogLossMetric() @@ -678,28 +654,20 @@ def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: import onnx onnx_text = """ - < - ir_version: 10, - opset_import: ["" : 21] - > MultiLabelObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) { predictions_out = Sigmoid(predictions_in) } """ # noqa: E501 - return onnx.parser.parse_model(onnx_text) + return onnx.parser.parse_graph(onnx_text) def output_class(self, pred: cn.ndarray) -> cn.ndarray: return cn.array(pred > 0.5, dtype=cn.int64) - def onnx_output_class(self, pred: cn.ndarray): + def onnx_output_class(self, pred: cn.ndarray) -> Any: import onnx onnx_text = """ - < - ir_version: 10, - opset_import: ["" : 21] - > MultiLabelOutputClass (double[N, M] predictions_in) => (double[N, M] predictions_out) { half = Constant() @@ -707,7 +675,7 @@ def onnx_output_class(self, pred: cn.ndarray): predictions_out = Cast(greater) } """ # noqa: E501 - return onnx.parser.parse_model(onnx_text) + return onnx.parser.parse_graph(onnx_text) def metric(self) -> MultiLabelMetric: return MultiLabelMetric() @@ -776,35 +744,29 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: import onnx + onnx_text = """ + LogLossObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) + """ # noqa: E501 + if pred.shape[1] == 1: - onnx_text = """ - < - ir_version: 10, - opset_import: ["" : 21] - > - LogLossObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) + onnx_text += """ { constant = Constant() a = Mul(predictions_in, constant) predictions_out = Sigmoid(a) } - """ # noqa: E501 - return onnx.parser.parse_model(onnx_text) + """ + return onnx.parser.parse_graph(onnx_text) constant = 1 / (pred.shape[1] - 1) - onnx_text_multiclass = f""" - < - ir_version: 10, - opset_import: ["" : 21] - > - LogLossObjective (double[N, M] predictions_in) => (double[N, M] predictions_out) + onnx_text += f""" {{ constant = Constant() a = Mul(predictions_in, constant) predictions_out = Softmax(a) }} """ - return onnx.parser.parse_model(onnx_text_multiclass) + return onnx.parser.parse_graph(onnx_text) def metric(self) -> ExponentialMetric: return ExponentialMetric() diff --git a/legateboost/onnx_utils.py b/legateboost/onnx_utils.py new file mode 100644 index 00000000..c1c3b8f6 --- /dev/null +++ b/legateboost/onnx_utils.py @@ -0,0 +1,104 @@ +from typing import Any, List + +import numpy as np + +import cupynumeric as cn + +# onnx is imported only if needed - keep this a soft dependency +try: + import onnx +except ImportError: + pass + + +def make_model(graph: onnx.GraphProto) -> onnx.ModelProto: + # make model with appropriate opset imports for legate-boost + LEGATEBOOST_ONNX_OPSET_IMPORTS = [ + onnx.helper.make_opsetid("ai.onnx.ml", 3), + onnx.helper.make_opsetid("", 21), + ] + return onnx.helper.make_model(graph, opset_imports=LEGATEBOOST_ONNX_OPSET_IMPORTS) + + +def reshape_predictions(graph: onnx.GraphProto, pred: cn.ndarray) -> onnx.GraphProto: + # àppend an onnx graph that shapes the predictions equivalently to pred + shape = list(pred.shape) + shape[0] = -1 + out_type = "int64" if pred.dtype == cn.int64 else "double" + onnx_text = f""" + ReshapePredictions ({out_type}[N, M] predictions_in) => ({out_type}{shape} predictions_out) + {{ + shape = Constant() + predictions_out = Reshape(predictions_in, shape) + }} + """ # noqa: E501 + reshape_graph = onnx.parser.parse_graph(onnx_text) + graph = onnx.compose.merge_graphs( + graph, + reshape_graph, + io_map=[ + (graph.output[0].name, "predictions_in"), + ], + prefix2="reshape_", + ) + return graph + + +def mirror_predict_proba_output(graph: onnx.GraphProto) -> onnx.GraphProto: + # where model outputs only true probability we need to add the false probability + onnx_text = """ + MirrorPredict (double[N, M] predictions_in) => (double[N, 2] predictions_out) + { + one = Constant() + false_probability = Sub(one, predictions_in) + predictions_out = Concat(false_probability, predictions_in) + } + """ # noqa: E501 + new_graph = onnx.parser.parse_graph(onnx_text) + new_graph = onnx.compose.merge_graphs( + graph, + new_graph, + io_map=[ + (graph.output[0].name, "predictions_in"), + ], + prefix2="mirror_", + ) + return new_graph + + +def init_predictions(model_init: cn.array, X_dtype: Any) -> onnx.GraphProto: + # form a graph that takes X_in and model_init as input and outputs + # model_init repeated n_rows times + + X_type_text = "double" if X_dtype == cn.float64 else "float" + onnx_text = f""" + InitPredictions ({X_type_text}[N, M] X_in) => ({X_type_text}[N, M] X_out, double[N, K] predictions_out) + {{ + X_out = Identity(X_in) + n_rows = Shape(X_in) + one = Constant() + tile_repeat = Concat(n_rows, one) + predictions_out = Tile(init, tile_repeat) + }} + """ # noqa: E501 + graph = onnx.parser.parse_graph(onnx_text) + graph.initializer.append( + onnx.numpy_helper.from_array(np.atleast_2d(model_init.__array__()), name="init") + ) + return graph + + +def merge_model_graphs(graphs: List[onnx.GraphProto]) -> onnx.GraphProto: + # merge a list of graphs into a single graph + combined = graphs[0] + for i, g in enumerate(graphs[1:]): + combined = onnx.compose.merge_graphs( + combined, + g, + io_map=[ + (combined.output[0].name, "X_in"), + (combined.output[1].name, "predictions_in"), + ], + prefix2="model_{}_".format(i), + ) + return combined diff --git a/legateboost/test/test_onnx.py b/legateboost/test/test_onnx.py index 89f5a377..07641e9f 100644 --- a/legateboost/test/test_onnx.py +++ b/legateboost/test/test_onnx.py @@ -4,10 +4,12 @@ import cupynumeric as cn import legateboost as lb +from legateboost.onnx_utils import make_model def compare_model_predictions(model, X): - sess = ort.InferenceSession(model.to_onnx(X).SerializeToString()) + onnx_model = make_model(model.to_onnx(X)) + sess = ort.InferenceSession(onnx_model.SerializeToString()) feeds = { "X_in": X, } @@ -99,6 +101,12 @@ def regression_dataset(dtype, n_outputs): @pytest.mark.parametrize("n_outputs", [1, 5]) def test_regressor(Model, objective, regression_dataset): X, y = regression_dataset + if ( + Model in [lb.models.KRR, lb.models.NN] + and objective == "gamma" + and X.dtype == np.float32 + ): + pytest.skip("Skipping as numerically unstable") if objective in [ "quantile", "gamma_deviance", From 5dcc2c2cc2ea73d4102e689dd8393da24f9b01d6 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Tue, 22 Apr 2025 05:16:11 -0700 Subject: [PATCH 17/21] Remove onnx type hints --- legateboost/onnx_utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/legateboost/onnx_utils.py b/legateboost/onnx_utils.py index c1c3b8f6..3b2f72fb 100644 --- a/legateboost/onnx_utils.py +++ b/legateboost/onnx_utils.py @@ -11,7 +11,7 @@ pass -def make_model(graph: onnx.GraphProto) -> onnx.ModelProto: +def make_model(graph: Any) -> Any: # make model with appropriate opset imports for legate-boost LEGATEBOOST_ONNX_OPSET_IMPORTS = [ onnx.helper.make_opsetid("ai.onnx.ml", 3), @@ -20,7 +20,7 @@ def make_model(graph: onnx.GraphProto) -> onnx.ModelProto: return onnx.helper.make_model(graph, opset_imports=LEGATEBOOST_ONNX_OPSET_IMPORTS) -def reshape_predictions(graph: onnx.GraphProto, pred: cn.ndarray) -> onnx.GraphProto: +def reshape_predictions(graph: Any, pred: cn.ndarray) -> Any: # àppend an onnx graph that shapes the predictions equivalently to pred shape = list(pred.shape) shape[0] = -1 @@ -44,7 +44,7 @@ def reshape_predictions(graph: onnx.GraphProto, pred: cn.ndarray) -> onnx.GraphP return graph -def mirror_predict_proba_output(graph: onnx.GraphProto) -> onnx.GraphProto: +def mirror_predict_proba_output(graph: Any) -> Any: # where model outputs only true probability we need to add the false probability onnx_text = """ MirrorPredict (double[N, M] predictions_in) => (double[N, 2] predictions_out) @@ -66,7 +66,7 @@ def mirror_predict_proba_output(graph: onnx.GraphProto) -> onnx.GraphProto: return new_graph -def init_predictions(model_init: cn.array, X_dtype: Any) -> onnx.GraphProto: +def init_predictions(model_init: cn.array, X_dtype: Any) -> Any: # form a graph that takes X_in and model_init as input and outputs # model_init repeated n_rows times @@ -88,7 +88,7 @@ def init_predictions(model_init: cn.array, X_dtype: Any) -> onnx.GraphProto: return graph -def merge_model_graphs(graphs: List[onnx.GraphProto]) -> onnx.GraphProto: +def merge_model_graphs(graphs: List[Any]) -> Any: # merge a list of graphs into a single graph combined = graphs[0] for i, g in enumerate(graphs[1:]): From b9f67f0d8ce059875c9638dd6b8a02d1e5df899b Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Wed, 23 Apr 2025 01:29:15 -0700 Subject: [PATCH 18/21] Update doc example --- legateboost/legateboost.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/legateboost/legateboost.py b/legateboost/legateboost.py index 4899ec42..a7e1b50c 100644 --- a/legateboost/legateboost.py +++ b/legateboost/legateboost.py @@ -906,7 +906,7 @@ def to_onnx(self, X: cn.ndarray, predict_function: str = "predict") -> Any: >>> y = np.random.random(X.shape[0]) >>> model = lb.LBRegressor(n_estimators=5).fit(X, y) >>> import onnxruntime as ort - >>> sess = ort.InferenceSession(model.to_onnx(X.dtype).SerializeToString()) + >>> sess = ort.InferenceSession(model.to_onnx(X).SerializeToString()) >>> onnx_pred = sess.run(None, {"X_in": X})[0] >>> assert np.allclose(model.predict(X), onnx_pred, atol=1e-6) >>> @@ -1204,7 +1204,7 @@ def to_onnx(self, X: cn.ndarray, predict_function: str = "predict") -> Any: >>> y = np.random.randint(0, 2, X.shape[0]) >>> model = lb.LBClassifier(n_estimators=5).fit(X, y) >>> import onnxruntime as ort - >>> sess = ort.InferenceSession(model.to_onnx(X.dtype, + >>> sess = ort.InferenceSession(model.to_onnx(X, ... predict_function="predict_proba").SerializeToString()) >>> onnx_pred = sess.run(None, {"X_in": X})[0] >>> assert np.allclose(model.predict_proba(X), onnx_pred, atol=1e-6) From f5dd467a51d5f1df46785eef4e2791e6dd87bbcc Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Thu, 24 Apr 2025 04:03:11 -0700 Subject: [PATCH 19/21] Make the onnx tree sparse with a recursive builder --- legateboost/models/tree.py | 114 +++++++++++++++++++++++++++---------- 1 file changed, 84 insertions(+), 30 deletions(-) diff --git a/legateboost/models/tree.py b/legateboost/models/tree.py index cc23d59b..5da43355 100644 --- a/legateboost/models/tree.py +++ b/legateboost/models/tree.py @@ -1,9 +1,11 @@ import copy import warnings +from dataclasses import dataclass from enum import IntEnum from typing import Any, Callable, List, Sequence, Union, cast import numpy as np +import numpy.typing as npt import cupynumeric as cn from legate.core import TaskTarget, get_legate_runtime, types @@ -316,6 +318,54 @@ def __mul__(self, scalar: Any) -> "Tree": new.leaf_value *= scalar return new + # copy the tree structure to numpy arrays + # cupynumeric element access is very slow + @dataclass + class TreeAsNumpy: + leaf_value: npt.NDArray[np.float64] + feature: npt.NDArray[np.int32] + split_value: npt.NDArray[np.float64] + gain: npt.NDArray[np.float64] + hessian: npt.NDArray[np.float64] + is_leaf: npt.NDArray[np.bool_] + + # structure of arrays for tree structure expected by onnx + # this container is a convenience to not have 7 function arguments + class OnnxSoa: + def __init__(self, size: int, n_outputs: int) -> None: + self.nodes_modes = np.full(size, "BRANCH_LEQ") + self.nodes_featureids = np.full(size, -1, dtype=np.int32) + self.nodes_truenodeids = np.full(size, -1, dtype=np.int32) + self.nodes_falsenodeids = np.full(size, -1, dtype=np.int32) + self.nodes_nodeids = np.arange(size, dtype=np.int32) + self.nodes_values = np.full(size, -1.0, dtype=np.float64) + self.leaf_weights = np.full((size, n_outputs), -1.0, dtype=np.float64) + + def recurse_tree( + self, tree: TreeAsNumpy, soa: OnnxSoa, old_node_idx: int, new_node_idx: int + ) -> int: + # new_node_idx is sparse + if tree.is_leaf[old_node_idx]: + soa.nodes_modes[new_node_idx] = "LEAF" + soa.leaf_weights[new_node_idx] = tree.leaf_value[old_node_idx] + return new_node_idx + else: + soa.nodes_modes[new_node_idx] = "BRANCH_LEQ" + soa.nodes_featureids[new_node_idx] = tree.feature[old_node_idx] + soa.nodes_values[new_node_idx] = tree.split_value[old_node_idx] + left_child_idx = new_node_idx + 1 + soa.nodes_truenodeids[new_node_idx] = left_child_idx + node_idx_counter = self.recurse_tree( + tree, soa, self.left_child(old_node_idx), left_child_idx + ) + right_child_idx = node_idx_counter + 1 + soa.nodes_falsenodeids[new_node_idx] = right_child_idx + node_idx_counter = self.recurse_tree( + tree, soa, self.right_child(old_node_idx), right_child_idx + ) + + return node_idx_counter + def to_onnx(self, X: cn.array) -> Any: from onnx import TensorProto, numpy_helper from onnx.helper import ( @@ -325,32 +375,39 @@ def to_onnx(self, X: cn.array) -> Any: np_dtype_to_tensor_dtype, ) + num_sparse_nodes = (self.hessian[:, 0] > 0.0).sum() + num_outputs = self.leaf_value.shape[1] + # copy the tree as numpy because single element + # access with cupynumeric is very slow + tree = Tree.TreeAsNumpy( + self.leaf_value.__array__(), + self.feature.__array__(), + self.split_value.__array__(), + self.gain.__array__(), + self.hessian.__array__(), + self.feature.__array__() == -1, + ) + soa = Tree.OnnxSoa(num_sparse_nodes, num_outputs) + # This recursive function could become a bottleneck for large trees + # In this case consider implmenting a C++ legate task for this conversion + # Cython could also work + self.recurse_tree(tree, soa, 0, 0) + onnx_nodes = [] - num_outputs = self.leaf_value.shape[1] - tree_max_nodes = self.feature.size - all_nodes_idx = np.arange(tree_max_nodes) - nodes_featureids = self.feature.__array__() - nodes_truenodeids = self.left_child(all_nodes_idx) - nodes_falsenodeids = self.right_child(all_nodes_idx) - node_modes = np.full(tree_max_nodes, "BRANCH_LEQ") - node_modes[self.is_leaf(all_nodes_idx)] = "LEAF" - leaf_targetids = np.full(tree_max_nodes, 0, dtype=np.int64) - # predict the leaf node index - # use it to later index into the 2d array of leaf weights - # as ONNX does not support 2d leaf weights - target_weights = all_nodes_idx.astype(np.float32) kwargs = {} # TreeEnsembleRegressor asks us to pass these as tensors when X.dtype is double + # we simply pass a set of indices as leaf weights and then add a node later to + # look up the (vector valued) leaf weights if X.dtype == np.float32: - kwargs["nodes_values"] = self.split_value.__array__() - kwargs["target_weights"] = target_weights + kwargs["nodes_values"] = soa.nodes_values.astype(np.float32) + kwargs["target_weights"] = soa.nodes_nodeids.astype(np.float32) else: kwargs["nodes_values_as_tensor"] = numpy_helper.from_array( - self.split_value.__array__(), name="nodes_values" + soa.nodes_values, name="nodes_values" ) kwargs["target_weights_as_tensor"] = numpy_helper.from_array( - target_weights.astype(np.float64), name="target_weights" + soa.nodes_nodeids.astype(np.float64), name="target_weights" ) # TreeEnsembleRegressor is deprecated, but its successor TreeEnsemble @@ -366,22 +423,20 @@ def to_onnx(self, X: cn.array) -> Any: membership_values=None, nodes_missing_value_tracks_true=None, nodes_hitrates=None, - nodes_modes=node_modes, - nodes_featureids=nodes_featureids, - nodes_truenodeids=nodes_truenodeids, - nodes_falsenodeids=nodes_falsenodeids, - nodes_nodeids=all_nodes_idx, - nodes_treeids=np.zeros(tree_max_nodes, dtype=np.int64), - target_ids=leaf_targetids, - target_nodeids=all_nodes_idx, - target_treeids=np.zeros(tree_max_nodes, dtype=np.int64), + nodes_modes=soa.nodes_modes, + nodes_featureids=soa.nodes_featureids, + nodes_truenodeids=soa.nodes_truenodeids, + nodes_falsenodeids=soa.nodes_falsenodeids, + nodes_nodeids=soa.nodes_nodeids, + nodes_treeids=np.zeros_like(soa.nodes_nodeids, dtype=np.int64), + target_ids=np.zeros_like(soa.nodes_nodeids, dtype=np.int64), + target_nodeids=soa.nodes_nodeids, + target_treeids=np.zeros_like(soa.nodes_nodeids, dtype=np.int64), **kwargs, ) ) - leaf_weights = numpy_helper.from_array( - self.leaf_value.__array__(), name="leaf_weights" - ) + leaf_weights = numpy_helper.from_array(soa.leaf_weights, name="leaf_weights") predictions_out = make_tensor_value_info( "predictions_out", TensorProto.DOUBLE, [None, num_outputs] ) @@ -424,6 +479,5 @@ def to_onnx(self, X: cn.array) -> Any: [X_in, predictions_in], [X_out, predictions_out], [leaf_weights], - # opset_imports=[make_opsetid("ai.onnx.ml", 3), make_opsetid("", 21)], ) return graph From 05f8553b59b12362f95a305355d3f7c196f9b267 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Thu, 24 Apr 2025 04:12:15 -0700 Subject: [PATCH 20/21] Address review --- legateboost/models/krr.py | 3 ++- legateboost/objectives.py | 18 +++++++++--------- legateboost/test/test_onnx.py | 2 -- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/legateboost/models/krr.py b/legateboost/models/krr.py index 0e1ea61d..0a41788d 100644 --- a/legateboost/models/krr.py +++ b/legateboost/models/krr.py @@ -247,7 +247,8 @@ def to_onnx(self, X: cn.array) -> Any: import onnx X_type_text = "double" if X.dtype == cn.float64 else "float" - assert self.sigma is not None, "Has model been trained?" + if self.sigma is None: + raise ValueError("Model has not been trained. Cannot export to ONNX.") denominator = -2.0 * self.sigma**2 onnx_text = f""" KRRModel ({X_type_text}[N, M] X_in, double[N, K] predictions_in) => ({X_type_text}[N, M] X_out, double[N, K] predictions_out) diff --git a/legateboost/objectives.py b/legateboost/objectives.py index 1c819258..08219f29 100644 --- a/legateboost/objectives.py +++ b/legateboost/objectives.py @@ -71,7 +71,7 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: """ return pred - def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: + def onnx_transform(self, pred: cn.ndarray) -> Any: """Returns an ONNX graph that accepts - "predictions_in" : 2D tensor of shape (n_samples, n_outputs) and type double. And outputs the transformed predictions. @@ -152,7 +152,7 @@ def onnx_output_class(self, pred: cn.ndarray) -> Any: import onnx onnx_text = """ - BaseModelOutputClass (double[N, M] predictions_in) => (double[N, M] predictions_out) + BaseModelOutputClass (double[N, M] predictions_in) => (int64[N, M] predictions_out) { predictions_out = ArgMax(predictions_in) } @@ -287,7 +287,7 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: pred[:, :, 1] = cn.clip(pred[:, :, 1], -5, 5) return pred - def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: + def onnx_transform(self, pred: cn.ndarray) -> Any: import onnx onnx_text = """ @@ -389,7 +389,7 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: return cn.exp(pred) @override - def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: + def onnx_transform(self, pred: cn.ndarray) -> Any: import onnx onnx_text = """ @@ -445,7 +445,7 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: return cn.exp(pred) @override - def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: + def onnx_transform(self, pred: cn.ndarray) -> Any: import onnx onnx_text = """ @@ -602,7 +602,7 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: div = cn.sum(e_x, axis=1) return e_x / div[:, cn.newaxis] - def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: + def onnx_transform(self, pred: cn.ndarray) -> Any: import onnx operator_to_use = "Sigmoid" if pred.shape[1] == 1 else "Softmax" @@ -650,7 +650,7 @@ def gradient(self, y: cn.ndarray, pred: cn.ndarray) -> GradPair: def transform(self, pred: cn.ndarray) -> cn.ndarray: return self.one / (self.one + cn.exp(-pred)) - def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: + def onnx_transform(self, pred: cn.ndarray) -> Any: import onnx onnx_text = """ @@ -668,7 +668,7 @@ def onnx_output_class(self, pred: cn.ndarray) -> Any: import onnx onnx_text = """ - MultiLabelOutputClass (double[N, M] predictions_in) => (double[N, M] predictions_out) + MultiLabelOutputClass (double[N, M] predictions_in) => (int64[N, M] predictions_out) { half = Constant() greater = Greater(predictions_in, half) @@ -741,7 +741,7 @@ def transform(self, pred: cn.ndarray) -> cn.ndarray: K = pred.shape[1] # number of classes return logloss.transform((1 / (K - 1)) * pred) - def onnx_transform(self, pred: cn.ndarray) -> cn.ndarray: + def onnx_transform(self, pred: cn.ndarray) -> Any: import onnx onnx_text = """ diff --git a/legateboost/test/test_onnx.py b/legateboost/test/test_onnx.py index 07641e9f..57e75d75 100644 --- a/legateboost/test/test_onnx.py +++ b/legateboost/test/test_onnx.py @@ -16,9 +16,7 @@ def compare_model_predictions(model, X): pred = model.predict(cn.array(X)) feeds["predictions_in"] = np.zeros((X.shape[0], pred.shape[1])) onnx_pred = sess.run(None, feeds)[1] - onnx_pred = onnx_pred.squeeze() assert onnx_pred.dtype == np.float64 - pred = pred.squeeze() assert pred.shape == onnx_pred.shape assert np.allclose( onnx_pred, pred, atol=1e-2 if X.dtype == np.float32 else 1e-6 From 3faf4f39f54ba732854de61ba2e6df407e85e382 Mon Sep 17 00:00:00 2001 From: Rory Mitchell Date: Fri, 2 May 2025 03:00:06 -0700 Subject: [PATCH 21/21] Some mypy issues --- ci/run_mypy.sh | 1 + legateboost/models/tree.py | 28 +++++++++++++++++++--------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/ci/run_mypy.sh b/ci/run_mypy.sh index fdb3e026..d3f818d1 100755 --- a/ci/run_mypy.sh +++ b/ci/run_mypy.sh @@ -14,6 +14,7 @@ set -e -E -u -o pipefail +mypy --version mypy \ --config-file ./pyproject.toml \ --exclude=legateboost/test \ diff --git a/legateboost/models/tree.py b/legateboost/models/tree.py index 5da43355..b5827f13 100644 --- a/legateboost/models/tree.py +++ b/legateboost/models/tree.py @@ -2,7 +2,7 @@ import warnings from dataclasses import dataclass from enum import IntEnum -from typing import Any, Callable, List, Sequence, Union, cast +from typing import Any, Callable, Dict, List, Sequence, Union, cast import numpy as np import numpy.typing as npt @@ -333,13 +333,23 @@ class TreeAsNumpy: # this container is a convenience to not have 7 function arguments class OnnxSoa: def __init__(self, size: int, n_outputs: int) -> None: - self.nodes_modes = np.full(size, "BRANCH_LEQ") - self.nodes_featureids = np.full(size, -1, dtype=np.int32) - self.nodes_truenodeids = np.full(size, -1, dtype=np.int32) - self.nodes_falsenodeids = np.full(size, -1, dtype=np.int32) - self.nodes_nodeids = np.arange(size, dtype=np.int32) - self.nodes_values = np.full(size, -1.0, dtype=np.float64) - self.leaf_weights = np.full((size, n_outputs), -1.0, dtype=np.float64) + self.nodes_modes: npt.NDArray[str] = np.full(size, "BRANCH_LEQ") + self.nodes_featureids: npt.NDArray[np.int32] = np.full( + size, -1, dtype=np.int32 + ) + self.nodes_truenodeids: npt.NDArray[np.int32] = np.full( + size, -1, dtype=np.int32 + ) + self.nodes_falsenodeids: npt.NDArray[np.int32] = np.full( + size, -1, dtype=np.int32 + ) + self.nodes_nodeids: npt.NDArray[np.int32] = np.arange(size, dtype=np.int32) + self.nodes_values: npt.NDArray[np.float64] = np.full( + size, -1.0, dtype=np.float64 + ) + self.leaf_weights: npt.NDArray[np.float64] = np.full( + (size, n_outputs), -1.0, dtype=np.float64 + ) def recurse_tree( self, tree: TreeAsNumpy, soa: OnnxSoa, old_node_idx: int, new_node_idx: int @@ -395,7 +405,7 @@ def to_onnx(self, X: cn.array) -> Any: onnx_nodes = [] - kwargs = {} + kwargs: Dict[str, Any] = {} # TreeEnsembleRegressor asks us to pass these as tensors when X.dtype is double # we simply pass a set of indices as leaf weights and then add a node later to # look up the (vector valued) leaf weights