Hidden Markov Model (HMM)
A sequence model using hmmlearn's CategoricalHMM to capture latent pitch states.
Overview
- Type: Sequence
- Library: hmmlearn
- Registry name:
hmm
- Class:
HMMModel
Note
Requires the hmm optional extra: pip install -e ".[hmm]"
Configuration
# configs/models/hmm.yaml
model_type: hmm
min_components: 1
max_components: 8
n_iter: 100
| Parameter |
Default |
Description |
min_components |
1 |
Minimum hidden states to try |
max_components |
8 |
Maximum hidden states to try |
n_iter |
100 |
EM iterations per fit |
How It Works
During fit(), the model sweeps the number of hidden states from min_components to max_components, training a CategoricalHMM for each value. The best model is selected by validation accuracy.
Usage
from pitch_sequencing import get_model
model = get_model("hmm", {"min_components": 1, "max_components": 8})
model.fit(X_train, y_train, X_val=X_val, y_val=y_val)
predictions = model.predict(X_test)
API Reference
pitch_sequencing.models.hmm_model.HMMModel
Bases: BaseModel
Hidden Markov Model for pitch sequence prediction.
Source code in src/pitch_sequencing/models/hmm_model.py
| class HMMModel(BaseModel):
"""Hidden Markov Model for pitch sequence prediction."""
def __init__(self, config=None):
config = config or {}
self.min_components = config.get("min_components", 1)
self.max_components = config.get("max_components", 8)
self.n_iter = config.get("n_iter", 100)
self._model = None
self._best_n = None
@property
def name(self) -> str:
return "HMM"
@property
def model_type(self) -> str:
return "sequence"
def fit(self, X_train, y_train, X_val=None, y_val=None, **kwargs):
"""Train HMM by sweeping n_components and picking best by validation accuracy.
For HMM, X_train is expected to be a flat 2D array of shape (n_samples, 1)
with encoded pitch types (the HMM uses its own flat encoding, not windowed).
y_train is the same flat array (self-supervised next-token prediction).
"""
from hmmlearn import hmm as hmmlearn_hmm
best_accuracy = 0
best_model = None
for n_components in range(self.min_components, self.max_components + 1):
model = hmmlearn_hmm.CategoricalHMM(
n_components=n_components,
n_iter=self.n_iter,
random_state=42,
)
model.fit(X_train)
if X_val is not None:
predicted = model.predict(X_val)
actual = X_val.flatten()
accuracy = np.mean(predicted == actual)
else:
predicted = model.predict(X_train)
actual = X_train.flatten()
accuracy = np.mean(predicted == actual)
if accuracy > best_accuracy:
best_accuracy = accuracy
best_model = model
self._best_n = n_components
self._model = best_model
def predict(self, X) -> np.ndarray:
return self._model.predict(X)
def predict_proba(self, X) -> np.ndarray:
"""Return emission probabilities for each sample given predicted state."""
states = self._model.predict(X)
emission = self._model.emissionprob_
return emission[states]
def get_params(self) -> dict:
return {
"n_components": self._best_n,
"min_components": self.min_components,
"max_components": self.max_components,
"n_iter": self.n_iter,
}
|
fit(X_train, y_train, X_val=None, y_val=None, **kwargs)
Train HMM by sweeping n_components and picking best by validation accuracy.
For HMM, X_train is expected to be a flat 2D array of shape (n_samples, 1)
with encoded pitch types (the HMM uses its own flat encoding, not windowed).
y_train is the same flat array (self-supervised next-token prediction).
Source code in src/pitch_sequencing/models/hmm_model.py
| def fit(self, X_train, y_train, X_val=None, y_val=None, **kwargs):
"""Train HMM by sweeping n_components and picking best by validation accuracy.
For HMM, X_train is expected to be a flat 2D array of shape (n_samples, 1)
with encoded pitch types (the HMM uses its own flat encoding, not windowed).
y_train is the same flat array (self-supervised next-token prediction).
"""
from hmmlearn import hmm as hmmlearn_hmm
best_accuracy = 0
best_model = None
for n_components in range(self.min_components, self.max_components + 1):
model = hmmlearn_hmm.CategoricalHMM(
n_components=n_components,
n_iter=self.n_iter,
random_state=42,
)
model.fit(X_train)
if X_val is not None:
predicted = model.predict(X_val)
actual = X_val.flatten()
accuracy = np.mean(predicted == actual)
else:
predicted = model.predict(X_train)
actual = X_train.flatten()
accuracy = np.mean(predicted == actual)
if accuracy > best_accuracy:
best_accuracy = accuracy
best_model = model
self._best_n = n_components
self._model = best_model
|
predict_proba(X)
Return emission probabilities for each sample given predicted state.
Source code in src/pitch_sequencing/models/hmm_model.py
| def predict_proba(self, X) -> np.ndarray:
"""Return emission probabilities for each sample given predicted state."""
states = self._model.predict(X)
emission = self._model.emissionprob_
return emission[states]
|