Skip to content

Metrics Reference

This page documents the metrics sub-package.


plots

medpipe.metrics.plots

Plot functions module.

This module provides functions to plot results.

Functions: - plot_metrics_CI: Plots the metrics with confidence intrevals for each fold. - plot_prediction_distribution: Plots the prediction probabilities. - plot_reliability_diagrams: Plots the reliability diagrams.

plot_metrics_CI(ci_dict, label_list, save_path='', extension='.png', show_fig=True, **kwargs)

Plots the metrics with confidence intrevals for each fold.

Parameters:

Name Type Description Default
ci_dict dict[str, list[tuple(float, float, float)]]

Dictionary containing the metric value and confidence intervals. The keys are the name of the metrics and the values are a list of tuple with first element the metric value, second the lower bound, and third the upper bound. One list elements per model. One list elements per model

required
label_list list[str]

List of labels for the legend.

required
save_path str

Path to the save file.

[]
extension str

Extension to save figure in.

".png"
show_fig bool

Flag to show the figure.

True
**kwargs

Extra arguments for the figure or axes objects.

{}

Returns:

Type Description
None

Nothing is returned.

Source code in src/medpipe/metrics/plots.py
def plot_metrics_CI(
    ci_dict, label_list, save_path="", extension=".png", show_fig=True, **kwargs
):
    """
    Plots the metrics with confidence intrevals for each fold.

    Parameters
    ----------
    ci_dict : dict[str, list[tuple(float, float, float)]]
        Dictionary containing the metric value and confidence intervals.
        The keys are the name of the metrics and the values are a list of tuple
        with first element the metric value, second the lower bound, and third
        the upper bound. One list elements per model. One list elements per model
    label_list : list[str]
        List of labels for the legend.
    save_path : str, default: []
        Path to the save file.
    extension : str, default: ".png"
        Extension to save figure in.
    show_fig : bool, default: True
        Flag to show the figure.
    **kwargs
        Extra arguments for the figure or axes objects.

    Returns
    -------
    None
        Nothing is returned.

    """
    # Split arguments based on where they should be sent
    ax_kwargs = {key: value for key, value in kwargs.items() if key in dir(Axes)}
    fig_kwargs = {key: value for key, value in kwargs.items() if key in dir(Figure)}

    # Set up some variables
    colours = [
        "#2D90D8",
        "#33367A",
        "#96690E",
        "#CDB4DB",
        "#F2CC8F",
    ]
    y_labels = {
        "auroc": "AUROC",
        "ap": "AUPRC",
        "log_loss": "Log loss",
        "accuracy": "Accuracy",
        "recall": "Recall",
        "precision": "Precision",
        "f1": "F1",
    }
    bar_width = 0.3
    x = np.arange(len(label_list)) * bar_width

    # Loop through each metric
    for key, values in ci_dict.items():
        # Set up the figure and axis
        fig, ax = plt.subplots(**fig_kwargs)  # One figure per metric

        for j in range(len(values[0])):
            value = values[0][j]
            lower_b = values[1][j]

            ax.bar(
                x[j],
                value,
                width=bar_width,
                color=colours[j],
                edgecolor=(0, 0, 0, 1),
                label=label_list[j],
            )

            ax.errorbar(
                x[j],
                value,
                yerr=value - lower_b,
                fmt="none",
                color="black",
                capsize=5,
            )

        # Customize the chart
        ax.set_ylabel(y_labels[key], fontweight="bold")
        if key != "log_loss":
            ax.set_ylim([0, 1.05])
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)

        # Remove x ticks
        ax.set_xticks([])
        ax.set_xticklabels([])

        # Place legend for the figure
        fig.legend(loc="center right", title="Models")

        # Set ax_kwargs to override if needed
        for key, val in ax_kwargs.items():
            getattr(ax, key)(val)

        plt.tight_layout()
        fig.subplots_adjust(right=0.7, bottom=0.14)
        if save_path:
            save_file = save_path + key + extension
            file_checks(save_file, extension=extension, exists=False)
            plt.savefig(save_file)
        if show_fig:
            plt.show()

        plt.close()

plot_prediction_distribution(dist_list, label_list=[], n_bins=10, save_path='', extension='.png', show_fig=True, **kwargs)

Plots the prediction probabilities.

Parameters:

Name Type Description Default
dist_list list[array]

List of the predicted probability distributions.

required
label_list list[str]

List of labels for the legend.

[]
n_bins int

Number of bins for the histogram.

10
save_path str

Path to the save file.

[]
extension str

Extension to save figure in.

".png"
show_fig bool

Flag to show the figure.

True
**kwargs

Extra arguments for the figure or axes objects.

{}

Returns:

Type Description
None

Nothing is returned.

Source code in src/medpipe/metrics/plots.py
def plot_prediction_distribution(
    dist_list,
    label_list=[],
    n_bins=10,
    save_path="",
    extension=".png",
    show_fig=True,
    **kwargs,
):
    """
    Plots the prediction probabilities.

    Parameters
    ----------
    dist_list : list[array]
        List of the predicted probability distributions.
    label_list : list[str]
        List of labels for the legend.
    n_bins : int, default: 10
        Number of bins for the histogram.
    save_path : str, default: []
        Path to the save file.
    extension : str, default: ".png"
        Extension to save figure in.
    show_fig : bool, default: True
        Flag to show the figure.
    **kwargs
        Extra arguments for the figure or axes objects.

    Returns
    -------
    None
        Nothing is returned.

    """
    title = kwargs["set_title"] if "set_title" in kwargs.keys() else ""

    # Split arguments based on where they should be sent
    ax_kwargs = {key: value for key, value in kwargs.items() if key in dir(Axes)}
    fig_kwargs = {key: value for key, value in kwargs.items() if key in dir(Figure)}

    # Set up variables
    colour_list = ["#2D90D8", "#33367A", "#96690E", "#CDB4DB", "#F2CC8F"]
    bins = np.linspace(0, 1, n_bins + 1)

    # Set figure and axes properties
    fig, ax = plt.subplots(**fig_kwargs)  # Create a new figure

    # Set labels and scale
    ax.set_xlabel("Predicted probabilities", fontweight="bold")
    ax.set_ylabel("Count", fontweight="bold")
    ax.set_yscale("log")

    # Set ax_kwargs to override if needed
    for key, val in ax_kwargs.items():
        getattr(ax, key)(val)

    ax.hist(
        dist_list,
        color=colour_list[: len(dist_list)],
        stacked=True,
        edgecolor="black",
        bins=bins,
        label=label_list,
    )
    # Remove spines for aesthetics
    plt.gca().spines["top"].set_visible(False)
    plt.gca().spines["right"].set_visible(False)

    # Add legend
    ax.legend(loc="upper right", bbox_to_anchor=(1.45, 0.9), title="Models")
    ax.set_title(title)
    ax.set_xlim([-0.05, 1.05])  # Set x limits

    # Adjust layout
    plt.tight_layout()
    fig.subplots_adjust(right=0.7, bottom=0.14)

    if save_path:
        save_file = save_path + extension
        file_checks(save_file, extension=extension, exists=False)
        plt.savefig(save_file)
    if show_fig:
        plt.show()

    plt.close()

plot_reliability_diagrams(y_test, proba_list, label_list=[], save_path='', extension='.png', show_fig=True, display_kwargs={}, **kwargs)

Plots the reliability diagrams for the given probabilities.

Parameters:

Name Type Description Default
y_test array-like of shape (n_samples, n_classes)

Ground truth labels.

required
proba_list list[array]

List of predicted probabilities.

required
label_list list[str]
[]
save_path str

Path to the save file.

[]
extension str

Extension to save figure in.

".png"
show_fig bool

Flag to show the figure.

True
display_kwargs dict[str, value]

Extra arguments for the CalibrationDisplay.

{}
**kwargs

Extra arguments for the figure or axes objects.

{}

Returns:

Type Description
None

Nothing is returned.

Source code in src/medpipe/metrics/plots.py
def plot_reliability_diagrams(
    y_test,
    proba_list,
    label_list=[],
    save_path="",
    extension=".png",
    show_fig=True,
    display_kwargs={},
    **kwargs,
):
    """
    Plots the reliability diagrams for the given probabilities.

    Parameters
    ----------
    y_test : array-like of shape (n_samples, n_classes)
        Ground truth labels.
    proba_list : list[array]
        List of predicted probabilities.
    label_list : list[str], default: []
    save_path : str, default: []
        Path to the save file.
    extension : str, default: ".png"
        Extension to save figure in.
    show_fig : bool, default: True
        Flag to show the figure.
    display_kwargs : dict[str, value], default: {}
        Extra arguments for the CalibrationDisplay.
    **kwargs
        Extra arguments for the figure or axes objects.

    Returns
    -------
    None
        Nothing is returned.

    """
    colours = ["#2D90D8", "#33367A", "#96690E", "#CDB4DB", "#F2CC8F"]

    # Split arguments based on where they should be sent
    ax_kwargs = {key: value for key, value in kwargs.items() if key in dir(Axes)}
    fig_kwargs = {key: value for key, value in kwargs.items() if key in dir(Figure)}

    # Set figure and axes properties
    fig, ax = plt.subplots(**fig_kwargs)  # Create a new figure

    # Plot perfect calibration
    ax.plot(
        np.linspace(0, 1, 100),
        np.linspace(0, 1, 100),
        "k--",
        label="Perfectly calibrated",
    )

    for i in range(len(proba_list)):
        prob_true, prob_pred = calibration_curve(
            y_test,
            proba_list[i],
            **display_kwargs,
        )
        ax.plot(
            prob_pred,
            prob_true,
            marker=".",
            color=colours[i],
            label=label_list[i],
        )

    title = kwargs["set_title"] if "set_title" in kwargs.keys() else ""
    ax.set_title(title)
    ax.set_xlabel("Predicted probabilities", fontweight="bold")
    ax.set_ylabel("Observed proportion", fontweight="bold")

    # Set ax_kwargs to override if needed
    for key, val in ax_kwargs.items():
        getattr(ax, key)(val)

    ax.legend(loc="upper right", bbox_to_anchor=(1.6, 0.9))
    plt.tight_layout()
    fig.subplots_adjust(right=0.66, bottom=0.14)

    if save_path:
        save_file = save_path + extension
        file_checks(save_file, extension=extension, exists=False)
        plt.savefig(save_file)
    if show_fig:
        plt.show()

    plt.close()

core

medpipe.metrics.core

Core metric functions module.

This module provides functions to compute and print metrics.

Functions: - print_metrics: prints the numerical metrics. - print_metrics_CI: prints numerical metrics with their confidence intervals. - compute_all_CI: computes the confidence interval for all metrics. - compute_CI: computes the confidence interval. - extract_metric : extracts a metric for each fold. - compute_pred_metrics : computes the metrics that require the prediction labels. - compute_score_metrics : computes the metrics that require the score.

compute_CI(data)

Computes the confidence interval of the data.

The CI is calculated using the Student's t-distribution.

Parameters:

Name Type Description Default
data array-like of shape (n_samples, n_sets)

Data on which to compute the confidence interval.

required

Returns:

Name Type Description
mean_arr np.array(float) of shape (n_sets,)

Mean values.

lower_b_arr np.array(float) of shape (n_sets,)

Lower bound of the confidence intervals.

upper_b_arr np.array(float) of shape (n_sets,)

Upper bound of the confidence intervals.

Raises:

Type Description
TypeError

If data is not array-like

Source code in src/medpipe/metrics/core.py
def compute_CI(data):
    """
    Computes the confidence interval of the data.

    The CI is calculated using the Student's t-distribution.

    Parameters
    ----------
    data : array-like of shape (n_samples, n_sets)
        Data on which to compute the confidence interval.

    Returns
    -------
    mean_arr : np.array(float) of shape (n_sets,)
        Mean values.
    lower_b_arr : np.array(float) of shape (n_sets,)
        Lower bound of the confidence intervals.
    upper_b_arr : np.array(float) of shape (n_sets,)
        Upper bound of the confidence intervals.

    Raises
    ------
    TypeError
        If data is not array-like

    """
    array_check(data)
    if type(data) is type([]):
        # Convert to array if needed
        arr_data = np.array(data)
    else:
        arr_data = data

    if len(arr_data.shape) == 1:
        # Make sure there are 2 dimensions
        arr_data = np.expand_dims(arr_data, 1)

    mean_arr = np.zeros(arr_data.shape[1])
    lower_b_arr = np.zeros(arr_data.shape[1])
    upper_b_arr = np.zeros(arr_data.shape[1])

    for i in range(arr_data.shape[1]):
        mean_arr[i] = np.mean(arr_data[:, i])
        std_err = sem(arr_data[:, i])

        lower_b_arr[i], upper_b_arr[i] = t.interval(
            0.95, len(arr_data[:, i]) - 1, loc=mean_arr[i], scale=std_err
        )

    return mean_arr, lower_b_arr, upper_b_arr

compute_all_CI(model_metrics, metric_list=[], **kwargs)

Computes the confidence intervals for all metrics.

Parameters:

Name Type Description Default
model_metrics dict[int, dict[str, float or tuple(array - like)]]

Model metrics for different folds.

required
metric_list list[str]

List of metrics to calculate confidence interval.

[]
**kwargs

Extra arguments for the compute_CI function.

{}

Returns:

Name Type Description
ci_dict dict[str, tuple(float, float, float)]

Dictionary containing the metric value and confidence intervals. The keys are the name of the metrics and the values are a tuple with first element the metric value, second the lower bound, and third the upper bound.

Source code in src/medpipe/metrics/core.py
def compute_all_CI(model_metrics, metric_list=[], **kwargs):
    """
    Computes the confidence intervals for all metrics.

    Parameters
    ----------
    model_metrics : dict[int, dict[str, float or tuple(array-like)]]
        Model metrics for different folds.
    metric_list : list[str], default: []
        List of metrics to calculate confidence interval.
    **kwargs
        Extra arguments for the compute_CI function.

    Returns
    -------
    ci_dict : dict[str, tuple(float, float, float)]
        Dictionary containing the metric value and confidence intervals.
        The keys are the name of the metrics and the values are a tuple with
        first element the metric value, second the lower bound, and third the
        upper bound.

    """
    ci_dict = {}  # Empty dict to contain the confidence intervals for metrics
    metrics = next(iter(model_metrics.values())).keys()

    if metric_list == []:
        # Default to all metrics if not specified
        metric_list = list(metrics)

    for metric in metrics:
        if metric == "roc" or metric == "prc" or metric not in metric_list:
            # Skip ROC, PRC, and metrics not in the given list
            continue
        metric_values = extract_metric(model_metrics, metric)
        ci_dict.update({metric: compute_CI(metric_values, **kwargs)})

    return ci_dict

compute_pred_metrics(metric_list, y_true, y_pred)

Computes the metrics that require the prediction labels.

Parameters:

Name Type Description Default
metric_list list[str]

List of metrics. Possible values: - accuracy - f1 - precision - recall

required
y_true array-like of shape (n_samples, n_classes)

Ground truth labels.

required
y_pred array-like of shape (n_samples, n_classes)

Predicted labels.

required

Returns:

Name Type Description
metric_dict dict[str, list[float]]

Dictionary of the metrics. The keys are the name of the metric and the values are the computed metric value. If multilabel then the list contains the value for each class and the last value is the average value.

Raises:

Type Description
ValueError

If the metric is not recognised.

Source code in src/medpipe/metrics/core.py
def compute_pred_metrics(metric_list, y_true, y_pred):
    """
    Computes the metrics that require the prediction labels.

    Parameters
    ----------
    metric_list : list[str]
        List of metrics. Possible values:
         - accuracy
         - f1
         - precision
         - recall
    y_true : array-like of shape (n_samples, n_classes)
        Ground truth labels.
    y_pred : array-like of shape (n_samples, n_classes)
        Predicted labels.

    Returns
    -------
    metric_dict : dict[str, list[float]]
        Dictionary of the metrics. The keys are the name of the metric
        and the values are the computed metric value.
        If multilabel then the list contains the value for each class and
        the last value is the average value.

    Raises
    ------
    ValueError
        If the metric is not recognised.

    """
    metric_dict = {}
    multilabel = False
    average = "binary"

    if len(y_true.shape) > 1:
        # Multilabel situation
        multilabel = True
        average = None

    if "accuracy" in metric_list:
        # Deal with accuracy separately to get accuracy for each label
        values = []  # Store values

        if multilabel:
            # Iterate over each label and add individual label accuracy
            for i in range(y_true.shape[1]):
                values.append(skl.metrics.accuracy_score(y_true[:, i], y_pred[:, i]))

        values.append(skl.metrics.accuracy_score(y_true, y_pred))
        metric_dict.update({"accuracy": values})
        metric_list.remove("accuracy")

    for metric in metric_list:
        values = []  # Create empty list to hold the metrics for each label
        match metric:
            case "f1":
                values.append(skl.metrics.f1_score(y_true, y_pred, average=average))
                if multilabel:
                    values = np.append(
                        values,
                        skl.metrics.f1_score(y_true, y_pred, average="weighted"),
                    )
                metric_dict.update({metric: values})

            case "precision":
                values.append(
                    skl.metrics.precision_score(
                        y_true, y_pred, average=average, zero_division=0.0
                    )
                )
                if multilabel:
                    values = np.append(
                        values,
                        skl.metrics.precision_score(
                            y_true, y_pred, average="weighted", zero_division=0.0
                        ),
                    )
                metric_dict.update({metric: values})

            case "recall":
                values.append(skl.metrics.recall_score(y_true, y_pred, average=average))
                if multilabel:
                    values = np.append(
                        values,
                        skl.metrics.recall_score(y_true, y_pred, average="weighted"),
                    )
                metric_dict.update({metric: values})

            case _:
                raise ValueError(f"{metric} is an unrecognised metric")

    return metric_dict

compute_score_metrics(metric_list, y_true, y_pred_proba)

Computes the metrics that require the score.

Parameters:

Name Type Description Default
metric_list list[str]

List of metrics. Possible values: - roc - auroc (area under the curve) - prc (precision-recall curve) - ap (average precision) - log_loss

required
y_true array-like of shape (n_samples, n_classes)

Ground truth labels.

required
y_pred_proba array or list[array]

Predicted scores.

required

Returns:

Name Type Description
metric_dict dict[str, list[float or tuple]]

Dictionary of the metrics. The keys are the name of the metric and the values are the computed metric values. If multilabel then the list contains the value for each class.

Raises:

Type Description
ValueError

If the metric is not recognised.

Source code in src/medpipe/metrics/core.py
def compute_score_metrics(metric_list, y_true, y_pred_proba):
    """
    Computes the metrics that require the score.

    Parameters
    ----------
    metric_list : list[str]
        List of metrics. Possible values:
         - roc
         - auroc (area under the curve)
         - prc (precision-recall curve)
         - ap (average precision)
         - log_loss
    y_true : array-like of shape (n_samples, n_classes)
        Ground truth labels.
    y_pred_proba : np.array or list[np.array]
        Predicted scores.

    Returns
    -------
    metric_dict : dict[str, list[float or tuple]]
        Dictionary of the metrics. The keys are the name of the metric
        and the values are the computed metric values.
        If multilabel then the list contains the value for each class.

    Raises
    ------
    ValueError
        If the metric is not recognised.

    """
    metric_dict = {}
    multilabel = True

    if len(y_true.shape) == 1:
        # Make into a list
        y_true = np.expand_dims(y_true, 1)
        if len(y_pred_proba.shape) == 2:
            y_pred_proba = np.expand_dims(y_pred_proba, 0)
        multilabel = False

    for metric in metric_list:
        values = []  # Create empty list to hold the metrics for each label
        for i, scores in enumerate(y_pred_proba):
            match metric:
                case "roc":
                    values.append(skl.metrics.roc_curve(y_true[:, i], scores[:, 1]))
                case "auroc":
                    values.append(skl.metrics.roc_auc_score(y_true[:, i], scores[:, 1]))
                case "prc":
                    values.append(
                        skl.metrics.precision_recall_curve(y_true[:, i], scores[:, 1])
                    )
                case "ap":
                    values.append(
                        skl.metrics.average_precision_score(y_true[:, i], scores[:, 1])
                    )
                case "log_loss":
                    values.append(skl.metrics.log_loss(y_true[:, i], scores[:, 1]))
                case _:
                    raise ValueError(f"{metric} is an unrecognised metric")

            metric_dict.update({metric: values})

        if multilabel:
            if metric == "ap" or metric == "auroc" or metric == "log_loss":
                # Add the average log loss, AUROC, and AP score
                metric_dict[metric].append(np.mean(metric_dict[metric]))
    return metric_dict

extract_metric(model_metrics, metric_name)

Extracts the desired metric from each fold in the metric dictionary.

Parameters:

Name Type Description Default
model_metrics dict[int, dict[str, float or tuple(array - like)]]

Model metrics for different folds.

required
metric_name str

Name of the metric to extract.

required

Returns:

Name Type Description
metric_list list[float]

List containing the metric values for each fold.

Source code in src/medpipe/metrics/core.py
def extract_metric(model_metrics, metric_name):
    """
    Extracts the desired metric from each fold in the metric dictionary.

    Parameters
    ----------
    model_metrics : dict[int, dict[str, float or tuple(array-like)]]
        Model metrics for different folds.
    metric_name : str
        Name of the metric to extract.

    Returns
    -------
    metric_list : list[float]
        List containing the metric values for each fold.

    """
    metric_list = []

    for metrics in model_metrics.values():
        # Loop through values directly to get the desired metric
        metric_list.append(metrics[metric_name])

    return metric_list

print_metrics(metric_dict, label_list, logger=None)

Prints the metrics on the terminal.

Parameters:

Name Type Description Default
metric_dict dict[str, float or tuple(array - like)]

Dictionary of the model performance for one fold. Keys are the metric name and values are the metric value. The test metrics used are: - accuracy - f1 - precision - recall - log_loss - auroc (Area Under Receiver Operator Characteristic) - ap (Average Precision)

required
label_list list[str]

List of predicted labels.

required
logger Logger

Logger object to log prints. If None print to terminal.

None

Returns:

Type Description
None

Nothing is returned.

Source code in src/medpipe/metrics/core.py
def print_metrics(metric_dict, label_list, logger=None) -> None:
    """
    Prints the metrics on the terminal.

    Parameters
    ----------
    metric_dict : dict[str, float or tuple(array-like)]
        Dictionary of the model performance for one fold.
        Keys are the metric name and values are the metric value.
        The test metrics used are:
         - accuracy
         - f1
         - precision
         - recall
         - log_loss
         - auroc (Area Under Receiver Operator Characteristic)
         - ap (Average Precision)
    label_list : list[str]
        List of predicted labels.
    logger : logging.Logger, default: None
        Logger object to log prints. If None print to terminal.

    Returns
    -------
    None
        Nothing is returned.

    """
    n_it = len(label_list)  # Number of print iterations

    for i in range(n_it):
        # If label_list is a list
        print_message(f"  {label_list[i]} metrics:", logger, SCRIPT_NAME)

        print_message(
            f"    Accuracy: {metric_dict["accuracy"][i]:.3f}", logger, SCRIPT_NAME
        )
        print_message(f"    F1: {metric_dict["f1"][i]:.3f}", logger, SCRIPT_NAME)
        print_message(
            f"    Precision: {metric_dict["precision"][i]:.3f}", logger, SCRIPT_NAME
        )
        print_message(
            f"    Recall: {metric_dict["recall"][i]:.3f}", logger, SCRIPT_NAME
        )
        print_message(
            f"    Log loss: {metric_dict["log_loss"][i]:.3f}", logger, SCRIPT_NAME
        )
        print_message(f"    AUROC: {metric_dict["auroc"][i]:.3f}", logger, SCRIPT_NAME)
        print_message(f"    AP: {metric_dict["ap"][i]:.3f}", logger, SCRIPT_NAME)

print_metrics_CI(ci_dict, label_list, logger=None)

Prints the metrics with their confidence intervals.

Parameters:

Name Type Description Default
ci_dict dict[str, tuple(float, float, float)]

Dictionary containing the metric value and confidence intervals. The keys are the name of the metrics and the values are a tuple with first element the metric value, second the lower bound, and third the upper bound.

required
logger Logger

Logger object to log prints. If None print to terminal.

None

Returns:

Type Description
None

Nothing is returned.

Source code in src/medpipe/metrics/core.py
def print_metrics_CI(ci_dict, label_list, logger=None):
    """
    Prints the metrics with their confidence intervals.

    Parameters
    ----------
    ci_dict : dict[str, tuple(float, float, float)]
        Dictionary containing the metric value and confidence intervals.
        The keys are the name of the metrics and the values are a tuple with
        first element the metric value, second the lower bound, and third the
        upper bound.
    logger : logging.Logger, default: None
        Logger object to log prints. If None print to terminal.

    Returns
    -------
    None
        Nothing is returned.

    """
    n_it = len(label_list)  # Number of print iterations

    for i in range(n_it):
        # If label_list is a list
        if i < len(label_list):
            print_message(f"  {label_list[i]} metrics:", logger, SCRIPT_NAME)
        else:
            print_message("  Global metrics:", logger, SCRIPT_NAME)

        for metric in ci_dict.keys():
            stat, lb, ub = ci_dict[metric]
            print_message(
                f"    {metric.capitalize()}: {stat[i]:.3f} CI [{lb[i]:.3f}, {ub[i]:.3f}]",
                logger,
                SCRIPT_NAME,
            )