Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CatBoostClassifierFunction
=======================================================================================================================================================================

.. currentmodule:: surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.catboost_classifier

.. autoclass:: CatBoostClassifierFunction

.. raw:: html

<div class="clearer"></div>
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ Classification
surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.lightgbm_classifier.LightGBMClassifierFunction
surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.random_forest_classifier.RandomForestClassifierFunction
surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.svm_classifier.SVMClassifierFunction
surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular.classification.test_functions.catboost_classifier.CatBoostClassifierFunction

Regression
----------
Expand Down
24 changes: 24 additions & 0 deletions docs/source/installation/machine_learning.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,30 @@ For XGBoost-based test functions:

----

CatBoost Support
================

For CatBoost-based test functions:

.. code-block:: bash

pip install surfaces[ml] catboost

.. code-block:: python

from surfaces.test_functions.machine_learning import CatBoostClassifierFunction

func = CatBoostClassifierFunction()
score = func({
"iterations": 100,
"depth": 6,
"learning_rate": 0.1,
"l2_leaf_reg": 3,
"random_strength": 1.0,
})

----

Usage Example
=============

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ viz = [
# Machine learning test functions
ml = [
"scikit-learn",
"catboost>=1.2.0",
"xgboost>=1.7.0",
"lightgbm>=4.0.0",
]
Expand Down
3 changes: 3 additions & 0 deletions src/surfaces/test_functions/machine_learning/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
)
from .hyperparameter_optimization import (
# Tabular - Classification
CatBoostClassifierFunction,
DecisionTreeClassifierFunction,
# Tabular - Regression
DecisionTreeRegressorFunction,
Expand Down Expand Up @@ -52,6 +53,7 @@

__all__ = [
# Tabular - Classification
"CatBoostClassifierFunction",
"DecisionTreeClassifierFunction",
"GradientBoostingClassifierFunction",
"KNeighborsClassifierFunction",
Expand Down Expand Up @@ -92,6 +94,7 @@

machine_learning_functions = [
# Tabular - Classification
CatBoostClassifierFunction,
DecisionTreeClassifierFunction,
GradientBoostingClassifierFunction,
KNeighborsClassifierFunction,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
)
from .tabular import (
# Classification
CatBoostClassifierFunction,
DecisionTreeClassifierFunction,
# Regression
DecisionTreeRegressorFunction,
Expand All @@ -51,6 +52,7 @@

__all__ = [
# Tabular - Classification
"CatBoostClassifierFunction",
"DecisionTreeClassifierFunction",
"GradientBoostingClassifierFunction",
"KNeighborsClassifierFunction",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# License: MIT License

from .classification import (
CatBoostClassifierFunction,
DecisionTreeClassifierFunction,
GradientBoostingClassifierFunction,
KNeighborsClassifierFunction,
Expand All @@ -22,6 +23,7 @@

__all__ = [
# Classification
"CatBoostClassifierFunction",
"DecisionTreeClassifierFunction",
"GradientBoostingClassifierFunction",
"KNeighborsClassifierFunction",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# License: MIT License

from .test_functions import (
CatBoostClassifierFunction,
DecisionTreeClassifierFunction,
GradientBoostingClassifierFunction,
KNeighborsClassifierFunction,
Expand All @@ -13,6 +14,7 @@
)

__all__ = [
"CatBoostClassifierFunction",
"DecisionTreeClassifierFunction",
"GradientBoostingClassifierFunction",
"KNeighborsClassifierFunction",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Email: simon.blanke@yahoo.com
# License: MIT License

from .catboost_classifier import CatBoostClassifierFunction
from .decision_tree_classifier import DecisionTreeClassifierFunction
from .gradient_boosting_classifier import GradientBoostingClassifierFunction
from .k_neighbors_classifier import KNeighborsClassifierFunction
Expand All @@ -11,6 +12,7 @@
from .xgboost_classifier import XGBoostClassifierFunction

__all__ = [
"CatBoostClassifierFunction",
"DecisionTreeClassifierFunction",
"GradientBoostingClassifierFunction",
"KNeighborsClassifierFunction",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""CatBoost Classifier test function for tabular ML."""

from typing import Any, Dict, List, Optional

import numpy as np

from surfaces.modifiers import BaseModifier

from .._base_classification import BaseClassification
from ..datasets import DATASETS


class CatBoostClassifierFunction(BaseClassification):
"""CatBoost Classifier test function.

Parameters
----------
dataset : str, default="digits"
Dataset to use. One of: "digits", "iris", "wine", "breast_cancer", "covtype".
cv : int, default=5
Number of cross-validation folds.
use_surrogate : bool, default=False
If True, use pre-trained surrogate for fast evaluation.
"""

name = "CatBoost Classifier Function"
_name_ = "catboost_classifier"
_dependencies = {"ml": ["catboost"]}

available_datasets = list(DATASETS.keys())
available_cv = [2, 3, 5, 10]

para_names = [
"iterations",
"depth",
"learning_rate",
"l2_leaf_reg",
"random_strength",
]

iterations_default = list(np.arange(50, 300, 25))
depth_default = list(range(3, 11))
learning_rate_default = [0.01, 0.03, 0.05, 0.1, 0.2]
l2_leaf_reg_default = [1, 3, 5, 7, 9]
random_strength_default = [0, 0.1, 0.5, 1.0, 2.0]

latex_formula = r"\text{CV-Accuracy} = f(\text{iterations}, \text{depth}, \text{learning\_rate}, \dots)"
tagline = (
"Cross-validated accuracy of a CatBoost classifier. "
"Gradient boosting with ordered boosting for categorical-friendly tree learning."
)

def __init__(
self,
dataset: str = "digits",
cv: int = 5,
objective: str = "maximize",
modifiers: Optional[List[BaseModifier]] = None,
memory: bool = False,
collect_data: bool = True,
callbacks=None,
catch_errors=None,
use_surrogate: bool = False,
):
if dataset not in DATASETS:
raise ValueError(f"Unknown dataset '{dataset}'. Available: {self.available_datasets}")
if cv not in self.available_cv:
raise ValueError(f"Invalid cv={cv}. Available: {self.available_cv}")

self.dataset = dataset
self.cv = cv
self._dataset_loader = DATASETS[dataset]

super().__init__(
objective=objective,
modifiers=modifiers,
memory=memory,
collect_data=collect_data,
callbacks=callbacks,
catch_errors=catch_errors,
use_surrogate=use_surrogate,
)

def _default_search_space(self) -> Dict[str, Any]:
return {
"iterations": self.iterations_default,
"depth": self.depth_default,
"learning_rate": self.learning_rate_default,
"l2_leaf_reg": self.l2_leaf_reg_default,
"random_strength": self.random_strength_default,
}

def _ml_objective(self, params: Dict[str, Any]) -> float:
from catboost import CatBoostClassifier
from sklearn.model_selection import cross_val_score

X, y = self._dataset_loader()
clf = CatBoostClassifier(
iterations=params["iterations"],
depth=params["depth"],
learning_rate=params["learning_rate"],
l2_leaf_reg=params["l2_leaf_reg"],
random_strength=params["random_strength"],
random_seed=42,
thread_count=-1,
allow_writing_files=False,
verbose=False,
)
scores = cross_val_score(clf, X, y, cv=self.cv, scoring="accuracy")
return scores.mean()

def _get_surrogate_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
return {**params, "dataset": self.dataset, "cv": self.cv}
20 changes: 20 additions & 0 deletions tests/full/smoke/test_catboost_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest

from surfaces.test_functions.machine_learning.hyperparameter_optimization.tabular import (
CatBoostClassifierFunction,
)


@pytest.mark.smoke
@pytest.mark.ml
def test_catboost_classifier_init():
"""Test that CatBoost Classifier instantiates and has a valid search space."""

func = CatBoostClassifierFunction(dataset="digits", cv=2)
space = func.search_space
config = {k: v[0] if isinstance(v, list) else v for k, v in space.items()}
score = func._ml_objective(config)

assert func is not None
assert isinstance(score, float)
assert 0.0 <= score <= 1.0
17 changes: 17 additions & 0 deletions tests/full/suites/test_ml_catboost.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import numpy as np
import pytest

from tests.conftest import get_sample_params


@pytest.mark.ml
def test_catboost_classifier(quick_ml_params):
"""CatBoostClassifier evaluates correctly."""
from surfaces.test_functions.machine_learning import CatBoostClassifierFunction

func = CatBoostClassifierFunction()
params = {**get_sample_params(func), **quick_ml_params}
result = func(params)

assert isinstance(result, (int, float))
assert np.isfinite(result)