Skip to content

AutoGluon

An AutoML model using AutoGluon's TabularPredictor for automated model selection and ensembling.

Overview

  • Type: Tabular
  • Library: AutoGluon
  • Registry name: autogluon
  • Class: AutoGluonModel

Note

Requires the autogluon optional extra: pip install -e ".[autogluon]"

Configuration

# configs/models/autogluon.yaml
model_type: autogluon
preset: good_quality
time_limit: null
models_dir: autogluon_pitchtype_models
Parameter Default Description
preset good_quality AutoGluon quality preset
time_limit null Max training time in seconds (null = no limit)
models_dir autogluon_pitchtype_models Directory for AutoGluon model artifacts

Usage

from pitch_sequencing import get_model

model = get_model("autogluon", {"preset": "good_quality"})
model.fit(X_train, y_train)

predictions = model.predict(X_test)
probabilities = model.predict_proba(X_test)

API Reference

pitch_sequencing.models.autogluon_model.AutoGluonModel

Bases: BaseModel

AutoGluon TabularPredictor for tabular pitch data.

Source code in src/pitch_sequencing/models/autogluon_model.py
class AutoGluonModel(BaseModel):
    """AutoGluon TabularPredictor for tabular pitch data."""

    def __init__(self, config=None):
        config = config or {}
        self.preset = config.get("preset", "good_quality")
        self.time_limit = config.get("time_limit", None)
        self.models_dir = config.get("models_dir", "autogluon_pitchtype_models")
        self._predictor = None
        self._label = None

    @property
    def name(self) -> str:
        return "AutoGluon"

    @property
    def model_type(self) -> str:
        return "tabular"

    def fit(self, X_train, y_train, X_val=None, y_val=None, **kwargs):
        from autogluon.tabular import TabularDataset, TabularPredictor

        self._label = y_train.name if hasattr(y_train, "name") else "target"
        train_df = pd.DataFrame(X_train).copy()
        train_df[self._label] = y_train.values if hasattr(y_train, "values") else y_train
        train_data = TabularDataset(train_df)

        fit_kwargs = {"presets": self.preset}
        if self.time_limit is not None:
            fit_kwargs["time_limit"] = self.time_limit

        self._predictor = TabularPredictor(
            label=self._label, path=self.models_dir
        ).fit(train_data, **fit_kwargs)

    def predict(self, X) -> np.ndarray:
        from autogluon.tabular import TabularDataset

        test_df = pd.DataFrame(X)
        return self._predictor.predict(TabularDataset(test_df)).values

    def predict_proba(self, X) -> np.ndarray:
        from autogluon.tabular import TabularDataset

        test_df = pd.DataFrame(X)
        proba = self._predictor.predict_proba(TabularDataset(test_df))
        return proba.values

    def get_params(self) -> dict:
        return {"preset": self.preset, "time_limit": self.time_limit}