Skip to content

Baseball Pitch Sequence Prediction

AutoGluon

jman4162/Baseball-Pitch-Sequence-Prediction

AutoGluon Model¶

AutoML model using AutoGluon TabularPredictor.

`pitch_sequencing.models.autogluon_model` ¶

AutoGluon TabularPredictor wrapper.

`AutoGluonModel` ¶

Bases: BaseModel

AutoGluon TabularPredictor for tabular pitch data.

Source code in src/pitch_sequencing/models/autogluon_model.py

class AutoGluonModel(BaseModel):
    """AutoGluon TabularPredictor for tabular pitch data."""

    def __init__(self, config=None):
        config = config or {}
        self.preset = config.get("preset", "good_quality")
        self.time_limit = config.get("time_limit", None)
        self.models_dir = config.get("models_dir", "autogluon_pitchtype_models")
        self._predictor = None
        self._label = None

    @property
    def name(self) -> str:
        return "AutoGluon"

    @property
    def model_type(self) -> str:
        return "tabular"

    def fit(self, X_train, y_train, X_val=None, y_val=None, **kwargs):
        from autogluon.tabular import TabularDataset, TabularPredictor

        self._label = y_train.name if hasattr(y_train, "name") else "target"
        train_df = pd.DataFrame(X_train).copy()
        train_df[self._label] = y_train.values if hasattr(y_train, "values") else y_train
        train_data = TabularDataset(train_df)

        fit_kwargs = {"presets": self.preset}
        if self.time_limit is not None:
            fit_kwargs["time_limit"] = self.time_limit

        self._predictor = TabularPredictor(
            label=self._label, path=self.models_dir
        ).fit(train_data, **fit_kwargs)

    def predict(self, X) -> np.ndarray:
        from autogluon.tabular import TabularDataset

        test_df = pd.DataFrame(X)
        return self._predictor.predict(TabularDataset(test_df)).values

    def predict_proba(self, X) -> np.ndarray:
        from autogluon.tabular import TabularDataset

        test_df = pd.DataFrame(X)
        proba = self._predictor.predict_proba(TabularDataset(test_df))
        return proba.values

    def get_params(self) -> dict:
        return {"preset": self.preset, "time_limit": self.time_limit}