From 7213b3a66208a87832a40f4e302ad7db64d998ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8B=8F=E6=9D=AD?= Date: Mon, 23 Mar 2026 15:50:17 +0800 Subject: [PATCH] Add CatBoost tabular ML test function --- ..._classifier.CatBoostClassifierFunction.rst | 10 ++ .../test_functions/machine_learning.rst | 1 + docs/source/installation/machine_learning.rst | 24 ++++ pyproject.toml | 1 + .../machine_learning/__init__.py | 3 + .../hyperparameter_optimization/__init__.py | 2 + .../tabular/__init__.py | 2 + .../tabular/classification/__init__.py | 2 + .../classification/test_functions/__init__.py | 2 + .../test_functions/catboost_classifier.py | 113 ++++++++++++++++++ tests/full/smoke/test_catboost_classifier.py | 20 ++++ tests/full/suites/test_ml_catboost.py | 17 +++ 12 files changed, 197 insertions(+) create mode 100644 docs/source/api_reference/test_functions/_autosummary/surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.catboost_classifier.CatBoostClassifierFunction.rst create mode 100644 src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/classification/test_functions/catboost_classifier.py create mode 100644 tests/full/smoke/test_catboost_classifier.py create mode 100644 tests/full/suites/test_ml_catboost.py diff --git a/docs/source/api_reference/test_functions/_autosummary/surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.catboost_classifier.CatBoostClassifierFunction.rst b/docs/source/api_reference/test_functions/_autosummary/surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.catboost_classifier.CatBoostClassifierFunction.rst new file mode 100644 index 0000000..82628f8 --- /dev/null +++ b/docs/source/api_reference/test_functions/_autosummary/surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.catboost_classifier.CatBoostClassifierFunction.rst @@ -0,0 +1,10 @@ +CatBoostClassifierFunction +======================================================================================================================================================================= + +.. currentmodule:: surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.catboost_classifier + +.. autoclass:: CatBoostClassifierFunction + +.. raw:: html + +
diff --git a/docs/source/api_reference/test_functions/machine_learning.rst b/docs/source/api_reference/test_functions/machine_learning.rst index cb765f2..2fb19f0 100644 --- a/docs/source/api_reference/test_functions/machine_learning.rst +++ b/docs/source/api_reference/test_functions/machine_learning.rst @@ -27,6 +27,7 @@ Classification surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.lightgbm_classifier.LightGBMClassifierFunction surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.random_forest_classifier.RandomForestClassifierFunction surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.svm_classifier.SVMClassifierFunction + surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.catboost_classifier.CatBoostClassifierFunction Regression ---------- diff --git a/docs/source/installation/machine_learning.rst b/docs/source/installation/machine_learning.rst index aa33dbf..6121ec0 100644 --- a/docs/source/installation/machine_learning.rst +++ b/docs/source/installation/machine_learning.rst @@ -126,6 +126,30 @@ For XGBoost-based test functions: ---- +CatBoost Support +================ + +For CatBoost-based test functions: + +.. code-block:: bash + + pip install surfaces[ml] catboost + +.. code-block:: python + + from surfaces.test_functions.machine_learning import CatBoostClassifierFunction + + func = CatBoostClassifierFunction() + score = func({ + "iterations": 100, + "depth": 6, + "learning_rate": 0.1, + "l2_leaf_reg": 3, + "random_strength": 1.0, + }) + +---- + Usage Example ============= diff --git a/pyproject.toml b/pyproject.toml index 0b2411d..30040d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ viz = [ # Machine learning test functions ml = [ "scikit-learn", + "catboost>=1.2.0", "xgboost>=1.7.0", "lightgbm>=4.0.0", ] diff --git a/src/surfaces/test_functions/machine_learning/__init__.py b/src/surfaces/test_functions/machine_learning/__init__.py index 728798e..35de3c9 100644 --- a/src/surfaces/test_functions/machine_learning/__init__.py +++ b/src/surfaces/test_functions/machine_learning/__init__.py @@ -20,6 +20,7 @@ ) from .hyperparameter_optimization import ( # Tabular - Classification + CatBoostClassifierFunction, DecisionTreeClassifierFunction, # Tabular - Regression DecisionTreeRegressorFunction, @@ -52,6 +53,7 @@ __all__ = [ # Tabular - Classification + "CatBoostClassifierFunction", "DecisionTreeClassifierFunction", "GradientBoostingClassifierFunction", "KNeighborsClassifierFunction", @@ -92,6 +94,7 @@ machine_learning_functions = [ # Tabular - Classification + CatBoostClassifierFunction, DecisionTreeClassifierFunction, GradientBoostingClassifierFunction, KNeighborsClassifierFunction, diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/__init__.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/__init__.py index c32177d..aed0fc8 100644 --- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/__init__.py +++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/__init__.py @@ -25,6 +25,7 @@ ) from .tabular import ( # Classification + CatBoostClassifierFunction, DecisionTreeClassifierFunction, # Regression DecisionTreeRegressorFunction, @@ -51,6 +52,7 @@ __all__ = [ # Tabular - Classification + "CatBoostClassifierFunction", "DecisionTreeClassifierFunction", "GradientBoostingClassifierFunction", "KNeighborsClassifierFunction", diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/__init__.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/__init__.py index 9d12cb1..6cc84f0 100644 --- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/__init__.py +++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/__init__.py @@ -3,6 +3,7 @@ # License: MIT License from .classification import ( + CatBoostClassifierFunction, DecisionTreeClassifierFunction, GradientBoostingClassifierFunction, KNeighborsClassifierFunction, @@ -22,6 +23,7 @@ __all__ = [ # Classification + "CatBoostClassifierFunction", "DecisionTreeClassifierFunction", "GradientBoostingClassifierFunction", "KNeighborsClassifierFunction", diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/classification/__init__.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/classification/__init__.py index 84d56c1..19d293a 100644 --- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/classification/__init__.py +++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/classification/__init__.py @@ -3,6 +3,7 @@ # License: MIT License from .test_functions import ( + CatBoostClassifierFunction, DecisionTreeClassifierFunction, GradientBoostingClassifierFunction, KNeighborsClassifierFunction, @@ -13,6 +14,7 @@ ) __all__ = [ + "CatBoostClassifierFunction", "DecisionTreeClassifierFunction", "GradientBoostingClassifierFunction", "KNeighborsClassifierFunction", diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/classification/test_functions/__init__.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/classification/test_functions/__init__.py index d935ca2..d50a953 100644 --- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/classification/test_functions/__init__.py +++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/classification/test_functions/__init__.py @@ -2,6 +2,7 @@ # Email: simon.blanke@yahoo.com # License: MIT License +from .catboost_classifier import CatBoostClassifierFunction from .decision_tree_classifier import DecisionTreeClassifierFunction from .gradient_boosting_classifier import GradientBoostingClassifierFunction from .k_neighbors_classifier import KNeighborsClassifierFunction @@ -11,6 +12,7 @@ from .xgboost_classifier import XGBoostClassifierFunction __all__ = [ + "CatBoostClassifierFunction", "DecisionTreeClassifierFunction", "GradientBoostingClassifierFunction", "KNeighborsClassifierFunction", diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/classification/test_functions/catboost_classifier.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/classification/test_functions/catboost_classifier.py new file mode 100644 index 0000000..9f365a4 --- /dev/null +++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/tabular/classification/test_functions/catboost_classifier.py @@ -0,0 +1,113 @@ +"""CatBoost Classifier test function for tabular ML.""" + +from typing import Any, Dict, List, Optional + +import numpy as np + +from surfaces.modifiers import BaseModifier + +from .._base_classification import BaseClassification +from ..datasets import DATASETS + + +class CatBoostClassifierFunction(BaseClassification): + """CatBoost Classifier test function. + + Parameters + ---------- + dataset : str, default="digits" + Dataset to use. One of: "digits", "iris", "wine", "breast_cancer", "covtype". + cv : int, default=5 + Number of cross-validation folds. + use_surrogate : bool, default=False + If True, use pre-trained surrogate for fast evaluation. + """ + + name = "CatBoost Classifier Function" + _name_ = "catboost_classifier" + _dependencies = {"ml": ["catboost"]} + + available_datasets = list(DATASETS.keys()) + available_cv = [2, 3, 5, 10] + + para_names = [ + "iterations", + "depth", + "learning_rate", + "l2_leaf_reg", + "random_strength", + ] + + iterations_default = list(np.arange(50, 300, 25)) + depth_default = list(range(3, 11)) + learning_rate_default = [0.01, 0.03, 0.05, 0.1, 0.2] + l2_leaf_reg_default = [1, 3, 5, 7, 9] + random_strength_default = [0, 0.1, 0.5, 1.0, 2.0] + + latex_formula = r"\text{CV-Accuracy} = f(\text{iterations}, \text{depth}, \text{learning\_rate}, \dots)" + tagline = ( + "Cross-validated accuracy of a CatBoost classifier. " + "Gradient boosting with ordered boosting for categorical-friendly tree learning." + ) + + def __init__( + self, + dataset: str = "digits", + cv: int = 5, + objective: str = "maximize", + modifiers: Optional[List[BaseModifier]] = None, + memory: bool = False, + collect_data: bool = True, + callbacks=None, + catch_errors=None, + use_surrogate: bool = False, + ): + if dataset not in DATASETS: + raise ValueError(f"Unknown dataset '{dataset}'. Available: {self.available_datasets}") + if cv not in self.available_cv: + raise ValueError(f"Invalid cv={cv}. Available: {self.available_cv}") + + self.dataset = dataset + self.cv = cv + self._dataset_loader = DATASETS[dataset] + + super().__init__( + objective=objective, + modifiers=modifiers, + memory=memory, + collect_data=collect_data, + callbacks=callbacks, + catch_errors=catch_errors, + use_surrogate=use_surrogate, + ) + + def _default_search_space(self) -> Dict[str, Any]: + return { + "iterations": self.iterations_default, + "depth": self.depth_default, + "learning_rate": self.learning_rate_default, + "l2_leaf_reg": self.l2_leaf_reg_default, + "random_strength": self.random_strength_default, + } + + def _ml_objective(self, params: Dict[str, Any]) -> float: + from catboost import CatBoostClassifier + from sklearn.model_selection import cross_val_score + + X, y = self._dataset_loader() + clf = CatBoostClassifier( + iterations=params["iterations"], + depth=params["depth"], + learning_rate=params["learning_rate"], + l2_leaf_reg=params["l2_leaf_reg"], + random_strength=params["random_strength"], + random_seed=42, + thread_count=-1, + allow_writing_files=False, + verbose=False, + ) + scores = cross_val_score(clf, X, y, cv=self.cv, scoring="accuracy") + return scores.mean() + + def _get_surrogate_params(self, params: Dict[str, Any]) -> Dict[str, Any]: + return {**params, "dataset": self.dataset, "cv": self.cv} diff --git a/tests/full/smoke/test_catboost_classifier.py b/tests/full/smoke/test_catboost_classifier.py new file mode 100644 index 0000000..b27f8bf --- /dev/null +++ b/tests/full/smoke/test_catboost_classifier.py @@ -0,0 +1,20 @@ +import pytest + +from surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular import ( + CatBoostClassifierFunction, +) + + +@pytest.mark.smoke +@pytest.mark.ml +def test_catboost_classifier_init(): + """Test that CatBoost Classifier instantiates and has a valid search space.""" + + func = CatBoostClassifierFunction(dataset="digits", cv=2) + space = func.search_space + config = {k: v[0] if isinstance(v, list) else v for k, v in space.items()} + score = func._ml_objective(config) + + assert func is not None + assert isinstance(score, float) + assert 0.0 <= score <= 1.0 diff --git a/tests/full/suites/test_ml_catboost.py b/tests/full/suites/test_ml_catboost.py new file mode 100644 index 0000000..89bbade --- /dev/null +++ b/tests/full/suites/test_ml_catboost.py @@ -0,0 +1,17 @@ +import numpy as np +import pytest + +from tests.conftest import get_sample_params + + +@pytest.mark.ml +def test_catboost_classifier(quick_ml_params): + """CatBoostClassifier evaluates correctly.""" + from surfaces.test_functions.machine_learning import CatBoostClassifierFunction + + func = CatBoostClassifierFunction() + params = {**get_sample_params(func), **quick_ml_params} + result = func(params) + + assert isinstance(result, (int, float)) + assert np.isfinite(result)