From 856e6c49bb53094160a75bf5828a1044ef7988c1 Mon Sep 17 00:00:00 2001
From: Arthur <arthur.lcte@gmail.com>
Date: Mon, 30 Mar 2026 18:26:20 +0200
Subject: [PATCH 1/2] Trust internal scikit-learn types needed for GB/HGB
 models

---
 skops/io/_sklearn.py           | 113 +++++++++++++++++++++++++-
 skops/io/tests/test_persist.py | 141 +++++++++++++++++++++++++++++++++
 2 files changed, 253 insertions(+), 1 deletion(-)

diff --git a/skops/io/_sklearn.py b/skops/io/_sklearn.py
index 3baee2bb..fab17fb9 100644
--- a/skops/io/_sklearn.py
+++ b/skops/io/_sklearn.py
@@ -6,7 +6,7 @@
 from sklearn.tree._tree import Tree
 
 from ._audit import Node, get_tree
-from ._general import TypeNode, unsupported_get_state
+from ._general import ObjectNode, TypeNode, object_get_state, unsupported_get_state
 from ._protocol import PROTOCOL
 from ._utils import LoadContext, SaveContext, get_module, get_state, gettype
 from .exceptions import UnsupportedTypeException
@@ -97,9 +97,81 @@
     LossFunction = None
 
 
+SKLEARN_INTERNAL_OBJECTS: set[type] = set()
+SKLEARN_TYPE_NAME_OVERRIDES: dict[type, str] = {}
+
+try:
+    from sklearn._loss.link import (
+        HalfLogitLink,
+        IdentityLink,
+        Interval,
+        LogitLink,
+        LogLink,
+        MultinomialLogit,
+    )
+
+    SKLEARN_INTERNAL_OBJECTS |= {
+        HalfLogitLink,
+        IdentityLink,
+        Interval,
+        LogLink,
+        LogitLink,
+        MultinomialLogit,
+    }
+except ImportError:
+    pass
+
+try:
+    from sklearn._loss.loss import (
+        AbsoluteError,
+        ExponentialLoss,
+        HalfBinomialLoss,
+        HalfGammaLoss,
+        HalfMultinomialLoss,
+        HalfPoissonLoss,
+        HalfSquaredError,
+        HuberLoss,
+        PinballLoss,
+    )
+
+    SKLEARN_INTERNAL_OBJECTS |= {
+        AbsoluteError,
+        ExponentialLoss,
+        HalfBinomialLoss,
+        HalfGammaLoss,
+        HalfMultinomialLoss,
+        HalfPoissonLoss,
+        HalfSquaredError,
+        HuberLoss,
+        PinballLoss,
+    }
+except ImportError:
+    pass
+
+if "CyHalfMultinomialLoss" in globals():
+    SKLEARN_INTERNAL_OBJECTS.add(CyHalfMultinomialLoss)
+    SKLEARN_TYPE_NAME_OVERRIDES[CyHalfMultinomialLoss] = (
+        "sklearn._loss._loss.CyHalfMultinomialLoss"
+    )
+
+try:
+    from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
+    from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor
+
+    SKLEARN_INTERNAL_OBJECTS |= {_BinMapper, TreePredictor}
+except ImportError:
+    pass
+
+
 UNSUPPORTED_TYPES = {Birch}
 
 
+def get_sklearn_internal_type_name(type_: type) -> str:
+    return SKLEARN_TYPE_NAME_OVERRIDES.get(
+        type_, get_module(type_) + "." + type_.__name__
+    )
+
+
 def reduce_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]:
     # This method is for objects for which we have to use the __reduce__
     # method to get the state.
@@ -260,6 +332,41 @@ def __init__(
         )
 
 
+def sklearn_internal_object_get_state(
+    obj: Any, save_context: SaveContext
+) -> dict[str, Any]:
+    state = object_get_state(obj, save_context)
+    module_name, _, class_name = get_sklearn_internal_type_name(type(obj)).rpartition(
+        "."
+    )
+    state["__module__"] = module_name
+    state["__class__"] = class_name
+    state["__loader__"] = "SklearnInternalObjectNode"
+    return state
+
+
+class SklearnInternalObjectNode(ObjectNode):
+    def __init__(
+        self,
+        state: dict[str, Any],
+        load_context: LoadContext,
+        trusted: Optional[Sequence[str]] = None,
+    ) -> None:
+        super().__init__(state, load_context, trusted)
+        trusted_sklearn_internals = [
+            get_sklearn_internal_type_name(type_) for type_ in SKLEARN_INTERNAL_OBJECTS
+        ]
+        trusted_sklearn_internals = [
+            type_name
+            for type_name in trusted_sklearn_internals
+            if type_name.startswith("sklearn.")
+        ]
+        self.trusted = self._get_trusted(
+            trusted,
+            default=trusted_sklearn_internals,
+        )
+
+
 # TODO: remove once support for sklearn<1.2 is dropped.
 def _DictWithDeprecatedKeys_get_state(
     obj: Any, save_context: SaveContext
@@ -321,12 +428,16 @@ def _construct(self):
 if CyLossFunction is not None:
     GET_STATE_DISPATCH_FUNCTIONS.append((CyLossFunction, loss_get_state))
 
+for type_ in SKLEARN_INTERNAL_OBJECTS:
+    GET_STATE_DISPATCH_FUNCTIONS.append((type_, sklearn_internal_object_get_state))
+
 for type_ in UNSUPPORTED_TYPES:
     GET_STATE_DISPATCH_FUNCTIONS.append((type_, unsupported_get_state))
 
 # tuples of type and function that creates the instance of that type
 NODE_TYPE_MAPPING: dict[tuple[str, int], Any] = {
     ("LossNode", PROTOCOL): LossNode,
+    ("SklearnInternalObjectNode", PROTOCOL): SklearnInternalObjectNode,
     ("TreeNode", PROTOCOL): TreeNode,
 }
 
diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py
index c20e0311..d80dc140 100644
--- a/skops/io/tests/test_persist.py
+++ b/skops/io/tests/test_persist.py
@@ -20,6 +20,12 @@
 from sklearn.datasets import load_sample_images, make_classification, make_regression
 from sklearn.decomposition import SparseCoder
 from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
+from sklearn.ensemble import (
+    GradientBoostingClassifier,
+    GradientBoostingRegressor,
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
 from sklearn.exceptions import SkipTestWarning
 from sklearn.experimental import enable_halving_search_cv  # noqa
 from sklearn.feature_extraction.text import TfidfVectorizer
@@ -438,6 +444,141 @@ def test_can_trust_types(type_):
     assert len(untrusted_types) == 0
 
 
+@pytest.mark.parametrize(
+    ("estimator", "problem_type"),
+    [
+        pytest.param(
+            GradientBoostingClassifier(loss="log_loss", n_estimators=5),
+            "multiclass",
+            id="GradientBoostingClassifier-log_loss-multiclass",
+        ),
+        pytest.param(
+            GradientBoostingClassifier(loss="exponential", n_estimators=5),
+            "binary",
+            id="GradientBoostingClassifier-exponential",
+        ),
+        pytest.param(
+            GradientBoostingRegressor(loss="squared_error", n_estimators=5),
+            "regression",
+            id="GradientBoostingRegressor-squared_error",
+        ),
+        pytest.param(
+            GradientBoostingRegressor(loss="absolute_error", n_estimators=5),
+            "regression",
+            id="GradientBoostingRegressor-absolute_error",
+        ),
+        pytest.param(
+            GradientBoostingRegressor(loss="huber", n_estimators=5),
+            "regression",
+            id="GradientBoostingRegressor-huber",
+        ),
+        pytest.param(
+            GradientBoostingRegressor(loss="quantile", n_estimators=5, alpha=0.8),
+            "regression",
+            id="GradientBoostingRegressor-quantile",
+        ),
+        pytest.param(
+            HistGradientBoostingClassifier(loss="log_loss", max_iter=5),
+            "binary",
+            id="HistGradientBoostingClassifier-log_loss",
+        ),
+        pytest.param(
+            HistGradientBoostingRegressor(loss="gamma", max_iter=5),
+            "positive_regression",
+            id="HistGradientBoostingRegressor-gamma",
+        ),
+        pytest.param(
+            HistGradientBoostingRegressor(loss="poisson", max_iter=5),
+            "positive_regression",
+            id="HistGradientBoostingRegressor-poisson",
+        ),
+    ],
+)
+def test_gradient_boosting_estimators_have_no_untrusted_types(estimator, problem_type):
+    set_random_state(estimator, random_state=0)
+
+    if problem_type == "binary":
+        X, y = make_classification(
+            n_samples=N_SAMPLES,
+            n_features=N_FEATURES,
+            n_classes=2,
+            n_informative=5,
+            random_state=0,
+        )
+    elif problem_type == "multiclass":
+        X, y = make_classification(
+            n_samples=140,
+            n_features=N_FEATURES,
+            n_classes=3,
+            n_informative=8,
+            n_clusters_per_class=1,
+            random_state=0,
+        )
+    elif problem_type == "positive_regression":
+        X, y = make_regression(
+            n_samples=N_SAMPLES,
+            n_features=N_FEATURES,
+            random_state=0,
+        )
+        y = np.abs(y) + 1
+    else:
+        X, y = make_regression(
+            n_samples=N_SAMPLES,
+            n_features=N_FEATURES,
+            random_state=0,
+        )
+
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", module="sklearn")
+        estimator.fit(X, y)
+
+    dumped = dumps(estimator)
+
+    assert get_untrusted_types(data=dumped) == []
+
+    loaded = loads(dumped)
+    assert_method_outputs_equal(estimator, loaded, X)
+
+
+def test_cyhalfmultinomialloss_is_serialized_under_sklearn_module():
+    estimator = GradientBoostingClassifier(loss="log_loss", n_estimators=5)
+    set_random_state(estimator, random_state=0)
+    X, y = make_classification(
+        n_samples=140,
+        n_features=N_FEATURES,
+        n_classes=3,
+        n_informative=8,
+        n_clusters_per_class=1,
+        random_state=0,
+    )
+
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", module="sklearn")
+        estimator.fit(X, y)
+
+    dumped = dumps(estimator)
+    with ZipFile(io.BytesIO(dumped), "r") as zip_file:
+        schema = json.loads(zip_file.read("schema.json"))
+
+    found = []
+
+    def walk(obj):
+        if isinstance(obj, dict):
+            if obj.get("__class__") == "CyHalfMultinomialLoss":
+                found.append(obj)
+            for value in obj.values():
+                walk(value)
+        elif isinstance(obj, list):
+            for value in obj:
+                walk(value)
+
+    walk(schema)
+
+    assert len(found) == 1
+    assert found[0]["__module__"] == "sklearn._loss._loss"
+    assert found[0]["__loader__"] == "SklearnInternalObjectNode"
+
+
 @pytest.mark.parametrize(
     "estimator", _unsupported_estimators(), ids=_get_check_estimator_ids
 )

From 68ae65529a89f8146a4b505360811b41aeb415df Mon Sep 17 00:00:00 2001
From: Arthur <arthur.lcte@gmail.com>
Date: Mon, 30 Mar 2026 19:06:13 +0200
Subject: [PATCH 2/2] clean-up

---
 skops/io/_sklearn.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/skops/io/_sklearn.py b/skops/io/_sklearn.py
index fab17fb9..a82cd4dd 100644
--- a/skops/io/_sklearn.py
+++ b/skops/io/_sklearn.py
@@ -172,6 +172,19 @@ def get_sklearn_internal_type_name(type_: type) -> str:
     )
 
 
+TRUSTED_SKLEARN_INTERNAL_TYPE_NAMES = [
+    get_sklearn_internal_type_name(type_) for type_ in SKLEARN_INTERNAL_OBJECTS
+]
+
+if not all(
+    type_name.startswith("sklearn.")
+    for type_name in TRUSTED_SKLEARN_INTERNAL_TYPE_NAMES
+):
+    raise RuntimeError(
+        "All trusted sklearn internal type names must start with 'sklearn.'."
+    )
+
+
 def reduce_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]:
     # This method is for objects for which we have to use the __reduce__
     # method to get the state.
@@ -353,17 +366,9 @@ def __init__(
         trusted: Optional[Sequence[str]] = None,
     ) -> None:
         super().__init__(state, load_context, trusted)
-        trusted_sklearn_internals = [
-            get_sklearn_internal_type_name(type_) for type_ in SKLEARN_INTERNAL_OBJECTS
-        ]
-        trusted_sklearn_internals = [
-            type_name
-            for type_name in trusted_sklearn_internals
-            if type_name.startswith("sklearn.")
-        ]
         self.trusted = self._get_trusted(
             trusted,
-            default=trusted_sklearn_internals,
+            default=TRUSTED_SKLEARN_INTERNAL_TYPE_NAMES,
         )