Transformation Comparison: Dropout vs DropConnect vs Ensemble vs Bayesian vs Evidential (PyTorch)

Date: 2026-01-29

Audience: Users who already looked at the individual transformation notebooks Framework: PyTorch Goal: Compare predictive uncertainty behavior on the same 1D regression setup.

What this notebook does

  • trains the same base MLP under different probly.transformation methods

  • compares RMSE (against noise-free ground truth)

  • compares NLL in-domain vs OOD

  • plots predictive mean ± 2 std

  • for evidential: decomposes aleatoric vs epistemic

What this notebook does NOT do

  • does not claim best hyperparameters or best performance

  • is not a benchmark, just a reproducible sanity-check style comparison

import math

import matplotlib.pyplot as plt
import numpy as np
import torch

from probly.transformation.bayesian import bayesian
from probly.transformation.dropconnect import dropconnect
from probly.transformation.dropout import dropout
from probly.transformation.ensemble import ensemble
from probly.transformation.evidential.regression import evidential_regression
torch.manual_seed(0)
np.random.seed(0)

device = torch.device("cpu")
device
device(type='cpu')
CFG = {
    "seed": 0,
    "epochs": 500,
    "mc_samples": 75,
    "ensemble_members": 3,
    "ensemble_epochs": 500,
    "evidential_epochs": 800,
    "verbose_every": 0,
}
from collections.abc import Callable

from torch import nn


def make_torch_regression_model_1d() -> nn.Module:
    """Small 1D regression MLP used as the base model in this tutorial."""
    return nn.Sequential(
        nn.Linear(2, 2),
        nn.ReLU(),
        nn.Linear(2, 1),
    )


def fallback_regression_model_1d() -> nn.Module:
    """Fallback model when fixtures are unavailable.

    Must match make_torch_regression_model_1d for consistent results.
    """
    return make_torch_regression_model_1d()


MAKE_BASE: Callable[[], nn.Module] = make_torch_regression_model_1d
print("Using base model factory: make_torch_regression_model_1d")
Using base model factory: make_torch_regression_model_1d
def make_dataset(
    n_train: int = 256,
    n_test: int = 400,
    train_range: float = 4.0,
    test_range: float = 6.0,
    noise_var: float = 3.0,
) -> tuple[
    tuple[torch.Tensor, torch.Tensor, torch.Tensor],
    tuple[torch.Tensor, torch.Tensor, torch.Tensor],
    float,
]:
    """Generate a simple 1D regression dataset with 2D features [x, x^2]."""
    noise_std = math.sqrt(noise_var)

    x_train = torch.rand(n_train, 1) * 2 * train_range - train_range
    x_test = torch.linspace(-test_range, test_range, n_test).unsqueeze(1)

    x_train_2d = torch.cat([x_train, x_train**2], dim=1)
    x_test_2d = torch.cat([x_test, x_test**2], dim=1)

    y_train = x_train**3 + noise_std * torch.randn_like(x_train)
    y_test_true = x_test**3  # noise-free "true function" for plotting

    return (x_train_2d, y_train, x_train), (x_test_2d, y_test_true, x_test), noise_std


(train_x, train_y, train_x1), (test_x, test_y_true, test_x1), noise_std = make_dataset()
train_x, train_y, test_x, test_y_true = [t.to(device) for t in [train_x, train_y, test_x, test_y_true]]

train_x1 = train_x1.to(device)
test_x1 = test_x1.to(device)

test_y = test_y_true + noise_std * torch.randn_like(test_y_true)

x_min, x_max = train_x1.min(), train_x1.max()
in_mask = (test_x1 >= x_min) & (test_x1 <= x_max)
ood_mask = ~in_mask

known_noise_std = noise_std

noise_std
1.7320508075688772

Dataset design

We use a simple 1D regression with known noise:

  • True function: y = x³

  • Training inputs: x ∈ [-4, 4]

  • Test inputs: x ∈ [-6, 6] (includes extrapolation regions)

  • Observation noise: ε ~ Normal(0, σ²)

Why this setup:

  • Inside [-4,4], models should fit well and uncertainty should be moderate.

  • Outside [-4,4], epistemic uncertainty should usually increase (less evidence).

  • Because we know σ, we can separate:

    • epistemic (model uncertainty estimated via sampling/disagreement)

    • aleatoric (data noise, fixed by construction)

plt.figure(figsize=(6, 4))
plt.scatter(train_x1.cpu().numpy(), train_y.cpu().numpy(), s=10, alpha=0.5, label="train noisy")
plt.plot(test_x1.cpu().numpy(), test_y_true.cpu().numpy(), label="true function")
plt.title("Toy regression: y = x^3 + noise (train in [-4,4], test in [-6,6])")
plt.legend()
plt.show()
../../_images/b58811045b9b9620184fbf5fa89fdcbaa8aed1994b3166009712a0e3dc8a7e6a.png

Uncertainty & evaluation setup (important)

In this notebook, epistemic uncertainty for dropout / dropconnect / ensemble / Bayesian methods is estimated via repeated stochastic forward passes (MC samples). Observation noise is assumed to be known and homoskedastic Gaussian noise from the data generation process, denoted as known_noise_std.

Therefore, we form the total predictive uncertainty as:

total_std = sqrt(epistemic_std**2 + known_noise_std**2)

For evaluation, RMSE is computed against the noise-free target test_y_true (function approximation quality), while NLL is computed against the noisy observations test_y (distributional calibration). We additionally report NLL separately for in-distribution (within the training x-range) and OOD (outside the training x-range).

from torch import Tensor


@torch.no_grad()
def mc_predict(
    model: nn.Module,
    x: Tensor,
    num_samples: int = 75,
    force_train_mode: bool = False,
) -> tuple[Tensor, Tensor]:
    was_training = model.training
    model.train(mode=force_train_mode)

    preds: list[Tensor] = []
    for _ in range(num_samples):
        preds.append(model(x))

    stacked = torch.stack(preds, dim=0)
    mean = stacked.mean(dim=0)
    std = stacked.std(dim=0, unbiased=False)

    model.train(mode=was_training)
    return mean, std


def train_mse(
    model: torch.nn.Module,
    x: torch.Tensor,
    y: torch.Tensor,
    epochs: int = 500,
    lr: float = 1e-2,
    weight_decay: float = 0.0,
    verbose_every: int = 300,
) -> torch.nn.Module:
    """Train a model with MSE loss on (x, y)."""
    model.to(device)
    model.train()

    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    loss_fn = torch.nn.MSELoss()

    for ep in range(1, epochs + 1):
        opt.zero_grad()
        pred = model(x)
        loss = loss_fn(pred, y)
        loss.backward()
        opt.step()

        if verbose_every and ep % verbose_every == 0:
            print(f"epoch {ep:4d} | mse {loss.item():.4f}")

    return model


def gaussian_nll(
    y: torch.Tensor,
    mean: torch.Tensor,
    std: torch.Tensor,
) -> torch.Tensor:
    """Pointwise Gaussian negative log-likelihood."""
    var = std**2 + 1e-8
    return 0.5 * torch.log(2 * math.pi * var) + 0.5 * (y - mean) ** 2 / var


def mean_nll(
    y: torch.Tensor,
    mean: torch.Tensor,
    std: torch.Tensor,
    mask: torch.Tensor | None = None,
) -> float:
    nll = gaussian_nll(y, mean, std)
    if mask is not None:
        nll = nll[mask]
    return nll.mean().item()


@torch.no_grad()
def rmse(y: torch.Tensor, mean: torch.Tensor) -> float:
    """Root mean squared error."""
    return torch.sqrt(torch.mean((y - mean) ** 2)).item()

Predictive uncertainty via sampling

Some transformations are stochastic at inference:

  • dropout / dropconnect: randomness comes from masks

  • bayesian: randomness comes from weight sampling (implementation-dependent)

  • ensemble: randomness comes from multiple trained members (no repeated sampling needed)

For these methods we estimate:

  • Predictive mean: average over samples/models

  • Epistemic std: standard deviation over samples/models

Because our base models output only a single scalar (no explicit noise head), we form a total predictive std using:

total_std = sqrt(epistemic_std² + noise_std²)

This gives a reasonable predictive distribution for NLL evaluation.

dist = torch.distributions


def evidential_studentt_params(
    out_dict: dict[str, torch.Tensor],
) -> tuple[
    torch.Tensor,
    torch.Tensor,
    torch.Tensor,
    torch.Tensor,
    torch.Tensor,
    torch.Tensor,
    torch.Tensor,
]:
    gamma = out_dict["gamma"]
    nu = out_dict["nu"]
    alpha = out_dict["alpha"]
    beta = out_dict["beta"]

    df = 2.0 * alpha
    loc = gamma
    var = beta * (1.0 + nu) / (nu * alpha)
    scale = torch.sqrt(var + 1e-8)
    return gamma, nu, alpha, beta, df, loc, scale


def evidential_loss(
    out_dict: dict[str, torch.Tensor],
    y: torch.Tensor,
    lam: float = 0.01,
) -> tuple[torch.Tensor, float, float]:
    gamma, nu, alpha, _beta, df, loc, scale = evidential_studentt_params(out_dict)
    tdist = dist.StudentT(df=df, loc=loc, scale=scale)

    nll = -tdist.log_prob(y).mean()
    evidence = 2.0 * nu + alpha
    reg = (torch.abs(y - gamma) * evidence).mean()

    return nll + lam * reg, nll.item(), reg.item()


@torch.no_grad()
def evidential_decompose(
    out_dict: dict[str, torch.Tensor],
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
    gamma = out_dict["gamma"]
    nu = out_dict["nu"]
    alpha = out_dict["alpha"]
    beta = out_dict["beta"]

    ale_var = beta / (alpha - 1.0 + 1e-8)
    epi_var = beta / (nu * (alpha - 1.0 + 1e-8))

    ale_std = torch.sqrt(ale_var + 1e-8)
    epi_std = torch.sqrt(epi_var + 1e-8)
    total_std = torch.sqrt(ale_var + epi_var + 1e-8)

    return gamma, ale_std, epi_std, total_std

Train + Evaluate: Dropout

MC sampling at inference (keep train mode during prediction).

Dropout (MC Dropout)

Training is standard MSE. At evaluation we keep the model in train() mode so dropout stays active, then run many forward passes. Variation across predictions approximates epistemic uncertainty.

base = MAKE_BASE().to(device)
m_dropout = dropout(base, p=0.1).to(device)

m_dropout = train_mse(m_dropout, train_x, train_y, epochs=CFG["epochs"], lr=1e-2, verbose_every=CFG["verbose_every"])

# MC sampling (force train mode so dropout is active)
mean_do, std_do = mc_predict(m_dropout, test_x, num_samples=CFG["mc_samples"], force_train_mode=True)

# Total std: epistemic from MC + known aleatoric (noise_std)
total_std_do = torch.sqrt(std_do**2 + noise_std**2)

nll_do_in = mean_nll(test_y, mean_do, total_std_do, in_mask)
nll_do_ood = mean_nll(test_y, mean_do, total_std_do, ood_mask)

rmse_do = rmse(test_y_true, mean_do)

print("Dropout | RMSE:", rmse_do, "| NLL_in:", nll_do_in, "| NLL_ood:", nll_do_ood)
Dropout | RMSE: 66.88179016113281 | NLL_in: 40.215293884277344 | NLL_ood: 322.47161865234375

Train + Evaluate: DropConnect

MC sampling at inference (keep train mode during prediction).

DropConnect

Same idea as dropout, but the randomness affects connections/weights rather than activations (depending on implementation). We again use multiple forward passes to estimate epistemic uncertainty.

base = MAKE_BASE().to(device)
m_dc = dropconnect(base, p=0.1).to(device)

m_dc = train_mse(m_dc, train_x, train_y, epochs=CFG["epochs"], lr=1e-2, verbose_every=CFG["verbose_every"])

mean_dc, std_dc = mc_predict(m_dc, test_x, num_samples=CFG["mc_samples"], force_train_mode=True)
total_std_dc = torch.sqrt(std_dc**2 + noise_std**2)

nll_dc_in = mean_nll(test_y, mean_dc, total_std_dc, in_mask)
nll_dc_ood = mean_nll(test_y, mean_dc, total_std_dc, ood_mask)

rmse_dc = rmse(test_y_true, mean_dc)

print("DropConnect | RMSE:", rmse_dc, "| NLL_in:", nll_dc_in, "| NLL_ood:", nll_dc_ood)
DropConnect | RMSE: 66.45568084716797 | NLL_in: 37.09754180908203 | NLL_ood: 437.0319519042969

Train + Evaluate: Ensemble

No MC sampling: variance comes from member disagreement.

Ensemble

probly.transformation.ensemble(...) returns a torch.nn.ModuleList, i.e. multiple independent members.

Key consequence:

  • You cannot do members(X) because ModuleList has no forward.

  • You must run each member separately, then aggregate.

Uncertainty comes from member disagreement:

  • mean over members = predictive mean

  • std over members = epistemic uncertainty estimate

from collections.abc import Sequence

base = MAKE_BASE().to(device)
members = ensemble(base, num_members=CFG["ensemble_members"], reset_params=True)

for mm in members:
    mm.to(device)

opt = torch.optim.Adam([p for mm in members for p in mm.parameters()], lr=1e-2)
loss_fn = torch.nn.MSELoss()

for ep in range(1, CFG["ensemble_epochs"] + 1):
    opt.zero_grad()

    member_losses = [loss_fn(mm(train_x), train_y) for mm in members]
    loss = torch.stack(member_losses).mean()

    loss.backward()
    opt.step()

    if CFG["verbose_every"] and ep % CFG["verbose_every"] == 0:
        print(f"epoch {ep:4d} | mse {loss.item():.4f}")


@torch.no_grad()
def ensemble_predict(
    members: Sequence[torch.nn.Module],
    x: torch.Tensor,
) -> tuple[torch.Tensor, torch.Tensor]:
    """Predictive mean and std across ensemble members."""
    preds: list[torch.Tensor] = []
    for mm in members:
        mm.eval()
        preds.append(mm(x))
    stacked = torch.stack(preds, dim=0)  # [M, N, 1]
    return stacked.mean(dim=0), stacked.std(dim=0, unbiased=False)


mean_ens, std_ens = ensemble_predict(members, test_x)
total_std_ens = torch.sqrt(std_ens**2 + noise_std**2)

nll_ens_in = mean_nll(test_y, mean_ens, total_std_ens, in_mask)
nll_ens_ood = mean_nll(test_y, mean_ens, total_std_ens, ood_mask)

rmse_ens = rmse(test_y_true, mean_ens)

print("Ensemble | RMSE:", rmse_ens, "| NLL_in:", nll_ens_in, "| NLL_ood:", nll_ens_ood)
Ensemble | RMSE: 54.49751281738281 | NLL_in: 2.9756195545196533 | NLL_ood: 6.665450572967529

Train + Evaluate: Bayesian

We treat the transformed model as stochastic and MC-sample it at inference. (Training is plain MSE here, because this notebook is about using the ProBly transformation API.)

Bayesian transformation (practical note)

In a “full Bayesian” setup you might train with an ELBO/KL term. Here we deliberately keep training simple (MSE), because the goal of this notebook is to demonstrate:

  • the ProBly API usage

  • the stochastic behavior at inference

  • uncertainty-aware evaluation

We still use MC sampling at inference to obtain a predictive distribution.

base = MAKE_BASE().to(device)
m_bayes = bayesian(base).to(device)

m_bayes = train_mse(m_bayes, train_x, train_y, epochs=CFG["epochs"], lr=1e-2, verbose_every=CFG["verbose_every"])

# MC sampling: even in eval it may be stochastic, but we'll just sample.
mean_b, std_b = mc_predict(m_bayes, test_x, num_samples=CFG["mc_samples"], force_train_mode=False)
total_std_b = torch.sqrt(std_b**2 + noise_std**2)

nll_b_in = mean_nll(test_y, mean_b, total_std_b, in_mask)
nll_b_ood = mean_nll(test_y, mean_b, total_std_b, ood_mask)

rmse_b = rmse(test_y_true, mean_b)

print("Bayesian | RMSE:", rmse_b, "| NLL_in:", nll_b_in, "| NLL_ood:", nll_b_ood)
Bayesian | RMSE: 65.43173217773438 | NLL_in: 37.11834716796875 | NLL_ood: 1430.4635009765625
with torch.no_grad():
    s = torch.stack([m_bayes(test_x) for _ in range(10)], dim=0).std().mean().item()
print("bayes sample std mean:", s)
bayes sample std mean: 29.752193450927734

Train + Evaluate: Evidential Regression

Single forward gives distribution parameters + analytic decomposition.

Evidential Regression

Evidential regression returns a dict of distribution parameters:

  • gamma: mean-like location

  • nu, alpha, beta: evidence/shape parameters

Unlike sampling-based methods, evidential regression can provide an analytic decomposition:

  • aleatoric uncertainty ~ beta / (alpha - 1)

  • epistemic uncertainty ~ beta / (nu * (alpha - 1))

We compute:

  • Student-t NLL (faithful to evidential predictive distribution)

  • Optional Gaussian-approx NLL (for easier comparison with other methods)

base = MAKE_BASE().to(device)
m_evi = evidential_regression(base).to(device)

m_evi.train()
opt = torch.optim.Adam(m_evi.parameters(), lr=1e-3)

for ep in range(1, CFG["evidential_epochs"] + 1):
    opt.zero_grad()
    out = m_evi(train_x)  # dict: gamma, nu, alpha, beta
    loss, nll_val, reg_val = evidential_loss(out, train_y, lam=0.01)
    loss.backward()
    opt.step()

    if ep % 400 == 0:
        print(f"epoch {ep:4d} | loss {loss.item():.4f} | nll {nll_val:.4f} | reg {reg_val:.4f}")

with torch.no_grad():
    out_test = m_evi(test_x)
    mean_evi, ale_std, epi_std, total_std_evi = evidential_decompose(out_test)

    # StudentT NLL (more faithful for evidential)
    gamma, nu, alpha, beta, df, loc, scale = evidential_studentt_params(out_test)
    tdist = dist.StudentT(df=df, loc=loc, scale=scale)
    nll_point_studentt = -tdist.log_prob(
        test_y,
    )
    # Note: NLL is evaluated on noisy observations (test_y),
    # not the noise-free test_y_true.

    nll_evi_studentt_in = nll_point_studentt[in_mask].mean().item()
    nll_evi_studentt_ood = nll_point_studentt[ood_mask].mean().item()

    # Optional: Gaussian approx NLL (for apples-to-apples)
    nll_evi_gauss_in = mean_nll(test_y, mean_evi, total_std_evi, in_mask)
    nll_evi_gauss_ood = mean_nll(test_y, mean_evi, total_std_evi, ood_mask)

    rmse_evi = rmse(test_y_true, mean_evi)

print(
    "Evidential | RMSE:",
    rmse_evi,
    "| NLL(StudentT)_in:",
    nll_evi_studentt_in,
    "| NLL(StudentT)_ood:",
    nll_evi_studentt_ood,
    "| NLL(Gauss)_in:",
    nll_evi_gauss_in,
    "| NLL(Gauss)_ood:",
    nll_evi_gauss_ood,
)
epoch  400 | loss 5.3736 | nll 5.1058 | reg 26.7803
epoch  800 | loss 4.9962 | nll 4.7552 | reg 24.1011
Evidential | RMSE: 79.96048736572266 | NLL(StudentT)_in: 4.8924336433410645 | NLL(StudentT)_ood: 11.615111351013184 | NLL(Gauss)_in: 19.519474029541016 | NLL(Gauss)_ood: 612.5783081054688
import pandas as pd

results = [
    ("dropout", rmse_do, nll_do_in, nll_do_ood),
    ("dropconnect", rmse_dc, nll_dc_in, nll_dc_ood),
    ("ensemble", rmse_ens, nll_ens_in, nll_ens_ood),
    ("bayesian", rmse_b, nll_b_in, nll_b_ood),
    ("evidential(StudentT)", rmse_evi, nll_evi_studentt_in, nll_evi_studentt_ood),
]

df = pd.DataFrame(results, columns=["method", "rmse", "nll_in", "nll_ood"]).sort_values("nll_ood")

for c in ["rmse", "nll_in", "nll_ood"]:
    df[c] = df[c].apply(lambda x: x.item() if hasattr(x, "item") else float(x))

df = df.round({"rmse": 4, "nll_in": 4, "nll_ood": 4})
df
method rmse nll_in nll_ood
2 ensemble 54.4975 2.9756 6.6655
4 evidential(StudentT) 79.9605 4.8924 11.6151
0 dropout 66.8818 40.2153 322.4716
1 dropconnect 66.4557 37.0975 437.0320
3 bayesian 65.4317 37.1183 1430.4635

Interpreting the metrics

  • RMSE: accuracy against the true (noise-free) function y = x³. Lower is better.

  • NLL: evaluates the full predictive distribution. Lower is better. NLL penalizes:

    • wrong means (poor accuracy)

    • overly small predicted uncertainty (overconfidence)

Caveat:

  • For dropout/dropconnect/ensemble/bayesian we use a Gaussian approximation and inject known noise_std.

  • For evidential we report Student-t NLL, which is more faithful to its predictive distribution.

Note: NLL is computed on the noisy observations test_y (not the noise-free test_y_true) and is reported separately for in-distribution (within the training x-range) and OOD (outside it), so it reflects both calibration and how uncertainty behaves under extrapolation, penalizing both under- and over-confidence.

from matplotlib.axes import Axes


def plot_method(
    ax: Axes,
    name: str,
    x: torch.Tensor,
    mean: torch.Tensor,
    std: torch.Tensor,
    color: str | None = None,
) -> None:
    """Plot predictive mean and a +/- 2 std band."""
    x_np = x.detach().cpu().numpy().flatten()
    mean_np = mean.detach().cpu().numpy().flatten()
    std_np = std.detach().cpu().numpy().flatten()

    ax.plot(x_np, mean_np, label=name, color=color)
    ax.fill_between(
        x_np,
        mean_np - 2 * std_np,
        mean_np + 2 * std_np,
        alpha=0.2,
        color=color,
    )


fig, axes = plt.subplots(1, 5, figsize=(22, 4), sharey=True)

# dropout
plot_method(axes[0], "dropout", test_x1, mean_do, total_std_do)
axes[0].scatter(train_x1.cpu().numpy(), train_y.cpu().numpy(), s=8, alpha=0.35)
axes[0].plot(test_x1.cpu().numpy(), test_y_true.cpu().numpy(), linewidth=1)
axes[0].set_title("dropout")

# dropconnect
plot_method(axes[1], "dropconnect", test_x1, mean_dc, total_std_dc)
axes[1].scatter(train_x1.cpu().numpy(), train_y.cpu().numpy(), s=8, alpha=0.35)
axes[1].plot(test_x1.cpu().numpy(), test_y_true.cpu().numpy(), linewidth=1)
axes[1].set_title("dropconnect")

# ensemble
plot_method(axes[2], "ensemble", test_x1, mean_ens, total_std_ens)
axes[2].scatter(train_x1.cpu().numpy(), train_y.cpu().numpy(), s=8, alpha=0.35)
axes[2].plot(test_x1.cpu().numpy(), test_y_true.cpu().numpy(), linewidth=1)
axes[2].set_title("ensemble")

# bayesian
plot_method(axes[3], "bayesian", test_x1, mean_b, total_std_b)
axes[3].scatter(train_x1.cpu().numpy(), train_y.cpu().numpy(), s=8, alpha=0.35)
axes[3].plot(test_x1.cpu().numpy(), test_y_true.cpu().numpy(), linewidth=1)
axes[3].set_title("bayesian")

# evidential
plot_method(axes[4], "evidential", test_x1, mean_evi, total_std_evi)
axes[4].scatter(train_x1.cpu().numpy(), train_y.cpu().numpy(), s=8, alpha=0.35)
axes[4].plot(test_x1.cpu().numpy(), test_y_true.cpu().numpy(), linewidth=1)
axes[4].set_title("evidential")

for ax in axes:
    ax.set_xlim(-6, 6)

plt.suptitle("Predictive mean ± 2 std (total uncertainty)")
plt.show()
../../_images/1a5b5eeb5ec148dbb1b1319f7495ea396f5a9ee55787072e167db884057ecb34.png

Reading the plots (mean ± 2 std)

  • The line is the predictive mean.

  • The shaded band is ± 2 standard deviations (a rough uncertainty interval).

  • On extrapolation regions (outside [-4,4]), good uncertainty methods often widen the band.

  • Inside [-4,4], methods should fit the data and not inflate uncertainty too much.

plt.figure(figsize=(7, 4))
plt.plot(test_x1.cpu().numpy(), ale_std.cpu().numpy(), label="aleatoric std")
plt.plot(test_x1.cpu().numpy(), epi_std.cpu().numpy(), label="epistemic std")
plt.plot(test_x1.cpu().numpy(), total_std_evi.cpu().numpy(), label="total std")
plt.title("Evidential uncertainty decomposition")
plt.legend()
plt.show()
../../_images/b0b761f22014df12dfe642ac7765644a3df343bf89b4ed988e6e11db10878492.png