From b6c87c8a2d2829227208c7b638b96dc96ddb5a98 Mon Sep 17 00:00:00 2001
From: Guan-Ting <timyo600@gmail.com>
Date: Fri, 11 Jul 2025 11:32:04 +0000
Subject: [PATCH 1/7] Add probability estimation function and parameter to be
 arguments for tree prediction

---
 libmultilabel/linear/tree.py | 39 +++++++++++++++++++++++++++++-------
 linear_trainer.py            |  2 ++
 main.py                      | 15 ++++++++++++++
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index fe6e94b4..4868b8d6 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -6,6 +6,7 @@
 import scipy.sparse as sparse
 import sklearn.cluster
 import sklearn.preprocessing
+from scipy.special import log_expit
 from tqdm import tqdm
 import psutil
 
@@ -54,21 +55,42 @@ def __init__(
         self.node_ptr = node_ptr
         self.multiclass = False
         self._model_separated = False # Indicates whether the model has been separated for pruning tree.
+        self.estimator = self.sigmoid_A
 
+    def exp_L2(self, x, A):
+        return np.square(np.maximum(0, 1 - x))
+
+    def exp_L1(self, x, A):
+        return np.maximum(0, 1 - x)
+
+    def sigmoid_A(self, x, A):
+        return log_expit(A * x)
+    
     def predict_values(
         self,
         x: sparse.csr_matrix,
         beam_width: int = 10,
+        estimation_function: str = "sigmoid_A",
+        estimation_parameter: int = 3,
     ) -> np.ndarray:
         """Calculate the probability estimates associated with x.
 
         Args:
             x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
             beam_width (int, optional): Number of candidates considered during beam search. Defaults to 10.
+            estimation_function (str, optional): The probability estimation function used in beamsearch. Default function is sigmoid-A.
+            estimation_parameter (int, optional): The extra parameter of probability estimation function if needed. Default value is 3. 
 
         Returns:
             np.ndarray: A matrix with dimension number of instances * number of classes.
         """
+        if estimation_function == "exp-L1":
+            self.estimator = self.exp_L1
+        elif estimation_function == "exp-L2":
+            self.estimator = self.exp_L2
+        elif estimation_function == "sigmoid_A":
+            self.estimator = self.sigmoid_A
+
         if beam_width >= len(self.root.children):
             # Beam_width is sufficiently large; pruning not applied.
             # Calculates decision values for all nodes.
@@ -78,8 +100,8 @@ def predict_values(
             if not self._model_separated:
                 self._separate_model_for_pruning_tree()
                 self._model_separated = True
-            all_preds = self._prune_tree_and_predict_values(x, beam_width) # number of instances * (number of labels + total number of metalabels)
-        return np.vstack([self._beam_search(all_preds[i], beam_width) for i in range(all_preds.shape[0])])
+            all_preds = self._prune_tree_and_predict_values(x, beam_width, estimation_parameter) # number of instances * (number of labels + total number of metalabels)
+        return np.vstack([self._beam_search(all_preds[i], beam_width, estimation_parameter) for i in range(all_preds.shape[0])])
 
     def _separate_model_for_pruning_tree(self):
         """
@@ -110,7 +132,7 @@ def _separate_model_for_pruning_tree(self):
             )
             self.subtree_models.append(subtree_flatmodel)
         
-    def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int) -> np.ndarray:
+    def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int, estimation_parameter: int) -> np.ndarray:
         """Calculates the selective decision values associated with instances x by evaluating only the most relevant subtrees.
 
         Only subtrees corresponding to the top beam_width candidates from the root are evaluated,
@@ -119,6 +141,7 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int)
         Args:
             x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
             beam_width (int): Number of top candidate branches considered for prediction.
+            estimation_parameter (int): The extra parameter of probability estimation function if needed.
 
         Returns:
             np.ndarray: A matrix with dimension number of instances * (number of labels + total number of metalabels).
@@ -129,7 +152,8 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int)
 
         # Calculate root decision values and scores
         root_preds = linear.predict_values(self.root_model, x)
-        children_scores = 0.0 - np.square(np.maximum(0, 1 - root_preds))
+        print(estimation_parameter)
+        children_scores = 0.0 - self.estimator(root_preds, estimation_parameter)
 
         slice = np.s_[:, self.node_ptr[self.root.index] : self.node_ptr[self.root.index + 1]]
         all_preds[slice] = root_preds
@@ -156,12 +180,13 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int)
 
         return all_preds
 
-    def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarray:
+    def _beam_search(self, instance_preds: np.ndarray, beam_width: int, estimation_parameter:int) -> np.ndarray:
         """Predict with beam search using cached probability estimates for a single instance.
 
         Args:
             instance_preds (np.ndarray): A vector of cached probability estimates of each node, has dimension number of labels + total number of metalabels.
             beam_width (int): Number of candidates considered.
+            estimation_parameter (int): The extra parameter of probability estimation function if needed.
 
         Returns:
             np.ndarray: A vector with dimension number of classes.
@@ -179,7 +204,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra
                     continue
                 slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
                 pred = instance_preds[slice]
-                children_score = score - np.square(np.maximum(0, 1 - pred))
+                children_score = score - self.estimator(pred, estimation_parameter)
                 next_level.extend(zip(node.children, children_score.tolist()))
 
             cur_level = sorted(next_level, key=lambda pair: -pair[1])[:beam_width]
@@ -190,7 +215,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra
         for node, score in cur_level:
             slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
             pred = instance_preds[slice]
-            scores[node.label_map] = np.exp(score - np.square(np.maximum(0, 1 - pred)))
+            scores[node.label_map] = np.exp(score - self.estimator(pred, estimation_parameter))
         return scores
 
 
diff --git a/linear_trainer.py b/linear_trainer.py
index b0524ee7..2805da1a 100644
--- a/linear_trainer.py
+++ b/linear_trainer.py
@@ -23,6 +23,8 @@ def linear_test(config, model, datasets, label_mapping):
     predict_kwargs = {}
     if model.name == "tree":
         predict_kwargs["beam_width"] = config.beam_width
+        predict_kwargs["estimation_function"] = config.estimation_function
+        predict_kwargs["estimation_parameter"] = config.estimation_parameter
 
     for i in tqdm(range(ceil(num_instance / config.eval_batch_size))):
         slice = np.s_[i * config.eval_batch_size : (i + 1) * config.eval_batch_size]
diff --git a/main.py b/main.py
index 12564f6b..ad981004 100644
--- a/main.py
+++ b/main.py
@@ -229,6 +229,21 @@ def add_all_arguments(parser):
         default=10,
         help="The width of the beam search (default: %(default)s)",
     )
+    parser.add_argument(
+        "--estimation_function",
+        type=str,
+        default="sigmoid_A",
+        choices=["exp-L1", "exp-L2", "sigmoid_A"],
+        help="The function that estimates probability in beam search (default: %(default)s)"
+    )
+
+    parser.add_argument(
+        "--estimation_parameter",
+        type=float,
+        default=3,
+        help="The parameter that probability estimation function may need (default: %(default)s)"
+    )
+
     # AttentionXML
     parser.add_argument(
         "--cluster_size",

From 0554c8ba69abac99dcef7a98372da55d21b67173 Mon Sep 17 00:00:00 2001
From: Guan-Ting <timyo600@gmail.com>
Date: Fri, 11 Jul 2025 11:35:11 +0000
Subject: [PATCH 2/7] fix sigmoid function

---
 libmultilabel/linear/tree.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index 4868b8d6..6b8aa8d7 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -64,7 +64,7 @@ def exp_L1(self, x, A):
         return np.maximum(0, 1 - x)
 
     def sigmoid_A(self, x, A):
-        return log_expit(A * x)
+        return -log_expit(A * x)
     
     def predict_values(
         self,

From e77e9c3f2fe06baacd0bffaa7ea64823b70ce4d3 Mon Sep 17 00:00:00 2001
From: Guan-Ting <timyo600@gmail.com>
Date: Fri, 11 Jul 2025 13:10:54 +0000
Subject: [PATCH 3/7] Save probability estimator parameter in class structure

---
 libmultilabel/linear/tree.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index 6b8aa8d7..15ac57a2 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -56,15 +56,16 @@ def __init__(
         self.multiclass = False
         self._model_separated = False # Indicates whether the model has been separated for pruning tree.
         self.estimator = self.sigmoid_A
+        self.estimator_parameter = 3
 
-    def exp_L2(self, x, A):
+    def exp_L2(self, x):
         return np.square(np.maximum(0, 1 - x))
 
-    def exp_L1(self, x, A):
+    def exp_L1(self, x):
         return np.maximum(0, 1 - x)
 
-    def sigmoid_A(self, x, A):
-        return -log_expit(A * x)
+    def sigmoid_A(self, x):
+        return -log_expit(self.estimator_parameter * x)
     
     def predict_values(
         self,
@@ -91,6 +92,8 @@ def predict_values(
         elif estimation_function == "sigmoid_A":
             self.estimator = self.sigmoid_A
 
+        self.estimator_parameter = estimation_parameter
+
         if beam_width >= len(self.root.children):
             # Beam_width is sufficiently large; pruning not applied.
             # Calculates decision values for all nodes.
@@ -100,8 +103,8 @@ def predict_values(
             if not self._model_separated:
                 self._separate_model_for_pruning_tree()
                 self._model_separated = True
-            all_preds = self._prune_tree_and_predict_values(x, beam_width, estimation_parameter) # number of instances * (number of labels + total number of metalabels)
-        return np.vstack([self._beam_search(all_preds[i], beam_width, estimation_parameter) for i in range(all_preds.shape[0])])
+            all_preds = self._prune_tree_and_predict_values(x, beam_width) # number of instances * (number of labels + total number of metalabels)
+        return np.vstack([self._beam_search(all_preds[i], beam_width) for i in range(all_preds.shape[0])])
 
     def _separate_model_for_pruning_tree(self):
         """
@@ -132,7 +135,7 @@ def _separate_model_for_pruning_tree(self):
             )
             self.subtree_models.append(subtree_flatmodel)
         
-    def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int, estimation_parameter: int) -> np.ndarray:
+    def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int) -> np.ndarray:
         """Calculates the selective decision values associated with instances x by evaluating only the most relevant subtrees.
 
         Only subtrees corresponding to the top beam_width candidates from the root are evaluated,
@@ -141,7 +144,6 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int,
         Args:
             x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
             beam_width (int): Number of top candidate branches considered for prediction.
-            estimation_parameter (int): The extra parameter of probability estimation function if needed.
 
         Returns:
             np.ndarray: A matrix with dimension number of instances * (number of labels + total number of metalabels).
@@ -152,8 +154,7 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int,
 
         # Calculate root decision values and scores
         root_preds = linear.predict_values(self.root_model, x)
-        print(estimation_parameter)
-        children_scores = 0.0 - self.estimator(root_preds, estimation_parameter)
+        children_scores = 0.0 - self.estimator(root_preds)
 
         slice = np.s_[:, self.node_ptr[self.root.index] : self.node_ptr[self.root.index + 1]]
         all_preds[slice] = root_preds
@@ -180,13 +181,12 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int,
 
         return all_preds
 
-    def _beam_search(self, instance_preds: np.ndarray, beam_width: int, estimation_parameter:int) -> np.ndarray:
+    def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarray:
         """Predict with beam search using cached probability estimates for a single instance.
 
         Args:
             instance_preds (np.ndarray): A vector of cached probability estimates of each node, has dimension number of labels + total number of metalabels.
             beam_width (int): Number of candidates considered.
-            estimation_parameter (int): The extra parameter of probability estimation function if needed.
 
         Returns:
             np.ndarray: A vector with dimension number of classes.
@@ -204,7 +204,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int, estimation_p
                     continue
                 slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
                 pred = instance_preds[slice]
-                children_score = score - self.estimator(pred, estimation_parameter)
+                children_score = score - self.estimator(pred)
                 next_level.extend(zip(node.children, children_score.tolist()))
 
             cur_level = sorted(next_level, key=lambda pair: -pair[1])[:beam_width]
@@ -215,7 +215,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int, estimation_p
         for node, score in cur_level:
             slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
             pred = instance_preds[slice]
-            scores[node.label_map] = np.exp(score - self.estimator(pred, estimation_parameter))
+            scores[node.label_map] = np.exp(score - self.estimator(pred))
         return scores
 
 

From 5808ec04c71bb1f5ed3d2edaae96e35b21fe72a8 Mon Sep 17 00:00:00 2001
From: Guan-Ting <timyo600@gmail.com>
Date: Thu, 17 Jul 2025 08:01:49 +0000
Subject: [PATCH 4/7] Remove unnecessary function and revise corresponding
 notes.

---
 libmultilabel/linear/tree.py | 25 +++++--------------------
 linear_trainer.py            |  1 -
 main.py                      |  9 +--------
 3 files changed, 6 insertions(+), 29 deletions(-)

diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index 15ac57a2..a3a7aeda 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -55,23 +55,15 @@ def __init__(
         self.node_ptr = node_ptr
         self.multiclass = False
         self._model_separated = False # Indicates whether the model has been separated for pruning tree.
-        self.estimator = self.sigmoid_A
         self.estimator_parameter = 3
 
-    def exp_L2(self, x):
-        return np.square(np.maximum(0, 1 - x))
-
-    def exp_L1(self, x):
-        return np.maximum(0, 1 - x)
-
     def sigmoid_A(self, x):
-        return -log_expit(self.estimator_parameter * x)
+        return log_expit(self.estimator_parameter * x)
     
     def predict_values(
         self,
         x: sparse.csr_matrix,
         beam_width: int = 10,
-        estimation_function: str = "sigmoid_A",
         estimation_parameter: int = 3,
     ) -> np.ndarray:
         """Calculate the probability estimates associated with x.
@@ -79,18 +71,11 @@ def predict_values(
         Args:
             x (sparse.csr_matrix): A matrix with dimension number of instances * number of features.
             beam_width (int, optional): Number of candidates considered during beam search. Defaults to 10.
-            estimation_function (str, optional): The probability estimation function used in beamsearch. Default function is sigmoid-A.
-            estimation_parameter (int, optional): The extra parameter of probability estimation function if needed. Default value is 3. 
+            estimation_parameter (int, optional): The tunable parameter of probability estimation function, that is sigmoid(estimation_parameter * preds). 
 
         Returns:
             np.ndarray: A matrix with dimension number of instances * number of classes.
         """
-        if estimation_function == "exp-L1":
-            self.estimator = self.exp_L1
-        elif estimation_function == "exp-L2":
-            self.estimator = self.exp_L2
-        elif estimation_function == "sigmoid_A":
-            self.estimator = self.sigmoid_A
 
         self.estimator_parameter = estimation_parameter
 
@@ -154,7 +139,7 @@ def _prune_tree_and_predict_values(self, x: sparse.csr_matrix, beam_width: int)
 
         # Calculate root decision values and scores
         root_preds = linear.predict_values(self.root_model, x)
-        children_scores = 0.0 - self.estimator(root_preds)
+        children_scores = 0.0 + self.sigmoid_A(root_preds)
 
         slice = np.s_[:, self.node_ptr[self.root.index] : self.node_ptr[self.root.index + 1]]
         all_preds[slice] = root_preds
@@ -204,7 +189,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra
                     continue
                 slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
                 pred = instance_preds[slice]
-                children_score = score - self.estimator(pred)
+                children_score = score + self.sigmoid_A(pred)
                 next_level.extend(zip(node.children, children_score.tolist()))
 
             cur_level = sorted(next_level, key=lambda pair: -pair[1])[:beam_width]
@@ -215,7 +200,7 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra
         for node, score in cur_level:
             slice = np.s_[self.node_ptr[node.index] : self.node_ptr[node.index + 1]]
             pred = instance_preds[slice]
-            scores[node.label_map] = np.exp(score - self.estimator(pred))
+            scores[node.label_map] = np.exp(score + self.sigmoid_A(pred))
         return scores
 
 
diff --git a/linear_trainer.py b/linear_trainer.py
index 2805da1a..637aa2f9 100644
--- a/linear_trainer.py
+++ b/linear_trainer.py
@@ -23,7 +23,6 @@ def linear_test(config, model, datasets, label_mapping):
     predict_kwargs = {}
     if model.name == "tree":
         predict_kwargs["beam_width"] = config.beam_width
-        predict_kwargs["estimation_function"] = config.estimation_function
         predict_kwargs["estimation_parameter"] = config.estimation_parameter
 
     for i in tqdm(range(ceil(num_instance / config.eval_batch_size))):
diff --git a/main.py b/main.py
index ad981004..0f296ebe 100644
--- a/main.py
+++ b/main.py
@@ -229,19 +229,12 @@ def add_all_arguments(parser):
         default=10,
         help="The width of the beam search (default: %(default)s)",
     )
-    parser.add_argument(
-        "--estimation_function",
-        type=str,
-        default="sigmoid_A",
-        choices=["exp-L1", "exp-L2", "sigmoid_A"],
-        help="The function that estimates probability in beam search (default: %(default)s)"
-    )
 
     parser.add_argument(
         "--estimation_parameter",
         type=float,
         default=3,
-        help="The parameter that probability estimation function may need (default: %(default)s)"
+        help="The parameter for probability estimation function (default: %(default)s)"
     )
 
     # AttentionXML

From bcf81c2825d0c59c550ecbf7a7bf301e6b167e26 Mon Sep 17 00:00:00 2001
From: Guan-Ting <timyo600@gmail.com>
Date: Wed, 13 Aug 2025 09:47:14 +0000
Subject: [PATCH 5/7] update cv

---
 libmultilabel/linear/tree.py | 53 ++++++++++++++++++++++++++++++++++--
 main.py                      |  1 -
 2 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/libmultilabel/linear/tree.py b/libmultilabel/linear/tree.py
index a3a7aeda..cb52ef53 100644
--- a/libmultilabel/linear/tree.py
+++ b/libmultilabel/linear/tree.py
@@ -11,6 +11,7 @@
 import psutil
 
 from . import linear
+from . import metrics
 
 __all__ = ["train_tree", "TreeModel"]
 
@@ -64,7 +65,6 @@ def predict_values(
         self,
         x: sparse.csr_matrix,
         beam_width: int = 10,
-        estimation_parameter: int = 3,
     ) -> np.ndarray:
         """Calculate the probability estimates associated with x.
 
@@ -77,8 +77,6 @@ def predict_values(
             np.ndarray: A matrix with dimension number of instances * number of classes.
         """
 
-        self.estimator_parameter = estimation_parameter
-
         if beam_width >= len(self.root.children):
             # Beam_width is sufficiently large; pruning not applied.
             # Calculates decision values for all nodes.
@@ -203,6 +201,55 @@ def _beam_search(self, instance_preds: np.ndarray, beam_width: int) -> np.ndarra
             scores[node.label_map] = np.exp(score + self.sigmoid_A(pred))
         return scores
 
+    def tuning_A_by_cross_validation(
+            self,
+            y: sparse.csr_matrix,
+            x: sparse.csr_matrix, 
+            n_folds: int,
+            batch_size: int,
+            beamwidth: int,
+            metric: list,
+            A_candidates: list,
+            options: str = "",
+            K=100,
+            dmax=10,
+    ):
+        data_splits = []
+        for n in range(n_folds):
+            start = np.ceil(n/n_folds*x.shape[0]).astype(int)
+            end = np.ceil((n+1)/n_folds*x.shape[0]).astype(int)
+            data_splits.append({'x':x[start:end, :], 'y':y[start:end ,:]})
+        
+        score = {m:{A:0 for A in A_candidates} for m in metric}        
+        for n in range(n_folds):
+            data_y = sparse.vstack([data_splits[j]["y"] for j in range(n_folds) if j != n])
+            data_x = sparse.vstack([data_splits[j]["x"] for j in range(n_folds) if j != n])
+
+            model = train_tree(
+                data_y,
+                data_x,
+                options,
+                K,
+                dmax,
+            )
+
+            for A in A_candidates:
+                model.estimator_parameter = A
+                
+                num_instances = data_splits[n]["x"].shape[0]
+                num_batch = np.ceil(num_instances/batch_size).astype(int)
+                metric_eval = metrics.get_metrics(metric ,num_classes = data_y.shape[1])
+                for i in range(num_batch):
+                    valid_x = data_splits[n]["x"][i * batch_size : (i+1) * batch_size]
+                    valid_y = data_splits[n]["y"][i * batch_size : (i+1) * batch_size]
+                    preds = model.predict_values(valid_x, beam_width=beamwidth)
+                    metric_eval.update(preds, valid_y)
+
+                eval = metric_eval.compute()
+                for k in eval.keys():
+                    score[k][A] += eval[k]
+                        
+                    self.estimator_parameter = max(score[k], key=score[k].get)
 
 def train_tree(
     y: sparse.csr_matrix,
diff --git a/main.py b/main.py
index 0f296ebe..19469b2f 100644
--- a/main.py
+++ b/main.py
@@ -229,7 +229,6 @@ def add_all_arguments(parser):
         default=10,
         help="The width of the beam search (default: %(default)s)",
     )
-
     parser.add_argument(
         "--estimation_parameter",
         type=float,

From 47339e43ba9379f513682858fe127a4ba389c251 Mon Sep 17 00:00:00 2001
From: Guan-Ting <timyo600@gmail.com>
Date: Tue, 19 Aug 2025 19:54:56 +0000
Subject: [PATCH 6/7] remove command line change

---
 linear_trainer.py | 1 -
 main.py           | 6 ------
 2 files changed, 7 deletions(-)

diff --git a/linear_trainer.py b/linear_trainer.py
index 637aa2f9..b0524ee7 100644
--- a/linear_trainer.py
+++ b/linear_trainer.py
@@ -23,7 +23,6 @@ def linear_test(config, model, datasets, label_mapping):
     predict_kwargs = {}
     if model.name == "tree":
         predict_kwargs["beam_width"] = config.beam_width
-        predict_kwargs["estimation_parameter"] = config.estimation_parameter
 
     for i in tqdm(range(ceil(num_instance / config.eval_batch_size))):
         slice = np.s_[i * config.eval_batch_size : (i + 1) * config.eval_batch_size]
diff --git a/main.py b/main.py
index 120767aa..bfcc8688 100644
--- a/main.py
+++ b/main.py
@@ -229,12 +229,6 @@ def add_all_arguments(parser):
         default=10,
         help="The width of the beam search (default: %(default)s)",
     )
-    parser.add_argument(
-        "--estimation_parameter",
-        type=float,
-        default=3,
-        help="The parameter for probability estimation function (default: %(default)s)"
-    )
 
     # AttentionXML
     parser.add_argument(

From acdf0c6e5b74de5536be6759179ebc965d780477 Mon Sep 17 00:00:00 2001
From: Guan-Ting <timyo600@gmail.com>
Date: Tue, 19 Aug 2025 19:55:51 +0000
Subject: [PATCH 7/7] remove extra blank line

---
 main.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/main.py b/main.py
index bfcc8688..3a1aa98c 100644
--- a/main.py
+++ b/main.py
@@ -229,7 +229,6 @@ def add_all_arguments(parser):
         default=10,
         help="The width of the beam search (default: %(default)s)",
     )
-
     # AttentionXML
     parser.add_argument(
         "--cluster_size",