svgp_trainer

`SparseGPTrainer`

Bases: BaseTrainer

Trains an SVGP model using specified parameters and early stopping.

Attributes:

Name	Type	Description
`num_inducing_points`	`int`	Number of inducing points for the SVGP.
`model`	`SVGP`	The Stochastic Variational Gaussian Process model.
`likelihood`	`gpytorch.likelihoods.GaussianLikelihood`	The likelihood of the model.

Parameters:

Name	Type	Description	Default
`X`	`array-like`	The input features.	required
`y`	`array-like`	The target outputs.	required
`num_inducing_points`	`int`	Number of inducing points to use in the SVGP model.	`100`
`sample_weights`	`array-like`	Sample weights for each data point. Defaults to None.	required
`test_size`	`float`	Fraction of the dataset to be used as test data. Defaults to 0.2.	required
`random_state`	`int`	Random seed for reproducible results. Defaults to 42.	required
`num_epochs`	`int`	Maximum number of training epochs. Defaults to 50.	required
`batch_size`	`int`	Batch size for training. Defaults to 256.	required
`optimizer_fn_name`	`str`	Name of the optimizer to use. Defaults to "Adam".	required
`lr`	`float`	Learning rate for the optimizer. Defaults to 0.01.	required
`use_scheduler`	`bool`	Whether to use a learning rate scheduler. Defaults to False.	required
`patience`	`int`	Number of epochs with no improvement before stopping training. Defaults to 10.	required
`dtype`	`torch.dtype`	The dtype to use for input tensors. Defaults to torch.float32.	required

Source code in uncertaintyplayground/trainers/svgp_trainer.py

class SparseGPTrainer(BaseTrainer):
    """
    Trains an SVGP model using specified parameters and early stopping.

    Attributes:
        num_inducing_points (int): Number of inducing points for the SVGP.
        model (SVGP): The Stochastic Variational Gaussian Process model.
        likelihood (gpytorch.likelihoods.GaussianLikelihood): The likelihood of the model.

    Args:
        X (array-like): The input features.
        y (array-like): The target outputs.
        num_inducing_points (int): Number of inducing points to use in the SVGP model.
        sample_weights (array-like, optional): Sample weights for each data point. Defaults to None.
        test_size (float, optional): Fraction of the dataset to be used as test data. Defaults to 0.2.
        random_state (int, optional): Random seed for reproducible results. Defaults to 42.
        num_epochs (int, optional): Maximum number of training epochs. Defaults to 50.
        batch_size (int, optional): Batch size for training. Defaults to 256.
        optimizer_fn_name (str, optional): Name of the optimizer to use. Defaults to "Adam".
        lr (float, optional): Learning rate for the optimizer. Defaults to 0.01.
        use_scheduler (bool, optional): Whether to use a learning rate scheduler. Defaults to False.
        patience (int, optional): Number of epochs with no improvement before stopping training. Defaults to 10.
        dtype (torch.dtype, optional): The dtype to use for input tensors. Defaults to torch.float32.
    """
    def __init__(self, *args, num_inducing_points=100, **kwargs):
        super().__init__(*args, **kwargs)
        self.num_inducing_points = num_inducing_points

        # Ensure inducing points are on the correct device
        inducing_points = self.X_train[:num_inducing_points, :].to(self.device)  # Move inducing points to the specified device

        self.model = SVGP(inducing_points, dtype=self.dtype, device=self.device)
        self.model = self.model.to(device=self.device, dtype=self.dtype)  # Ensure the model is on the right device

        self.likelihood = gpytorch.likelihoods.GaussianLikelihood(
            dtype=self.dtype).to(device=self.device, dtype=self.dtype)  # Ensure the likelihood is on the right device

        print(f"Model device: {self.model.device}")
        print(f"Data device: {next(iter(self.train_loader))[0].device}")

    def train(self):
        # set the seed
        torch.manual_seed(self.random_state)

        # define the optimizer & loss function (dynamically)
        optimizer_fn = getattr(torch.optim, self.optimizer_fn_name)
        optimizer = optimizer_fn(self.model.parameters(), lr=self.lr)

        # can use either one of the schuedlers
        # define the learning rate scheduler if use_scheduler is True
        # if self.use_scheduler:
        #     scheduler = torch.optim.lr_scheduler.LambdaLR(
        #         optimizer, self.custom_lr_scheduler)
        if self.use_scheduler:
            scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr = 5, epochs=50,  steps_per_epoch=len(self.train_loader))

        # define the loss function
        mll = gpytorch.mlls.VariationalELBO(
            self.likelihood, self.model, num_data=self.X_train.shape[0])

        # Early stopping parameters
        early_stopping = EarlyStopping(
            patience=self.patience, compare_fn=lambda x, y: x < y)

        # Initiate the model training mode
        self.model.train()
        self.likelihood.train()

        for i in range(self.num_epochs):
            for X_batch, y_batch, weights_batch in self.train_loader:
                X_batch, y_batch, weights_batch = X_batch.to(device = self.device, dtype=self.dtype, non_blocking = True), y_batch.to(device = self.device, dtype=self.dtype, non_blocking = True), weights_batch.to(device = self.device, dtype=self.dtype, non_blocking = True)  # Move tensors to the chosen device
                optimizer.zero_grad()
                output = self.model(X_batch)
                unweighted_loss = -mll(output, y_batch)

                # Apply sample weights
                weighted_loss = torch.mean(unweighted_loss * weights_batch)

                weighted_loss.backward()

                optimizer.step()

                # the scheduler is called after the optimizer
                if self.use_scheduler:
                    scheduler.step()                

            # if self.use_scheduler and i >= 2:
            #     scheduler.step()
            # if self.use_scheduler:
            #     scheduler.step()

            # Compute validation metrics (MSE and R2)
            self.model.eval()
            self.likelihood.eval()

            with torch.no_grad(), gpytorch.settings.fast_pred_var():
                raw_output = self.model(self.X_val.to(self.device))
                y_pred_val = self.likelihood(raw_output).mean
                # for debugging
                # print("Shape of y_pred_val after likelihood:", y_pred_val.detach().cpu().numpy().shape)
                # print("Shape of X_val:", self.X_val.shape)

            y_true_val = self.y_val.detach().cpu().numpy()
            y_pred_val = y_pred_val.detach().cpu().numpy()

            # for debugging
            # print("Shape of y_true_val before:", self.y_val.shape)
            # print("Shape of y_pred_val before:", y_pred_val.shape)

            mse_val = mean_squared_error(y_true_val,y_pred_val)
            r2_val = r2_score(y_true_val,y_pred_val)

            self.model.train()
            self.likelihood.train()

            print(
                f"Epoch {i + 1}/{self.num_epochs}, Weighted Loss: {weighted_loss.item():.3f}, Val MSE: {mse_val:.6f}, Val R2: {r2_val:.3f}")

            should_stop = early_stopping(mse_val, self.model)

            if should_stop:
                print(f"Early stopping after {i + 1} epochs")
                break

        if early_stopping.best_model_state is not None:
            self.model.load_state_dict(early_stopping.best_model_state)
            self.model.eval()
            self.likelihood.eval()

    def predict_with_uncertainty(self, X):
        """
        Predicts the mean and variance of the output distribution given input tensor X.

        Args:
            X (tensor): Input tensor of shape (num_samples, num_features).

        Returns:
            tuple: A tuple of the mean and variance of the output distribution, both of shape (num_samples,).
        """
        self.model.eval()
        self.likelihood.eval()

        # Convert numpy array to PyTorch tensor if necessary
        if isinstance(X, np.ndarray):
            X = torch.from_numpy(X).to(device= self.device, dtype = self.dtype)

        # Check if X is a single instance and add an extra dimension if necessary
        if X.ndim == 1:
            X = torch.unsqueeze(X, 0)

        with torch.no_grad(), gpytorch.settings.fast_pred_var():
            # Get the predictive mean and variance
            preds = self.likelihood(self.model(X))
            mean = preds.mean.detach().cpu().numpy()
            variance = preds.variance.detach().cpu().numpy()

        return mean, variance

`predict_with_uncertainty(X)`

Predicts the mean and variance of the output distribution given input tensor X.

Parameters:

Name	Type	Description	Default
`X`	`tensor`	Input tensor of shape (num_samples, num_features).	required

Returns:

Name	Type	Description
`tuple`		A tuple of the mean and variance of the output distribution, both of shape (num_samples,).

Source code in uncertaintyplayground/trainers/svgp_trainer.py

def predict_with_uncertainty(self, X):
    """
    Predicts the mean and variance of the output distribution given input tensor X.

    Args:
        X (tensor): Input tensor of shape (num_samples, num_features).

    Returns:
        tuple: A tuple of the mean and variance of the output distribution, both of shape (num_samples,).
    """
    self.model.eval()
    self.likelihood.eval()

    # Convert numpy array to PyTorch tensor if necessary
    if isinstance(X, np.ndarray):
        X = torch.from_numpy(X).to(device= self.device, dtype = self.dtype)

    # Check if X is a single instance and add an extra dimension if necessary
    if X.ndim == 1:
        X = torch.unsqueeze(X, 0)

    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        # Get the predictive mean and variance
        preds = self.likelihood(self.model(X))
        mean = preds.mean.detach().cpu().numpy()
        variance = preds.variance.detach().cpu().numpy()

    return mean, variance