Metrics Reference

This page documents the metrics sub-package.

plots

`medpipe.metrics.plots`

Plot functions module.

This module provides functions to plot results.

Functions: - plot_metrics_CI: Plots the metrics with confidence intrevals for each fold. - plot_prediction_distribution: Plots the prediction probabilities. - plot_reliability_diagrams: Plots the reliability diagrams.

`plot_metrics_CI(ci_dict, label_list, save_path='', extension='.png', show_fig=True, **kwargs)`

Plots the metrics with confidence intrevals for each fold.

Parameters:

Name	Type	Description	Default
`ci_dict`	`dict[str, list[tuple(float, float, float)]]`	Dictionary containing the metric value and confidence intervals. The keys are the name of the metrics and the values are a list of tuple with first element the metric value, second the lower bound, and third the upper bound. One list elements per model. One list elements per model	required
`label_list`	`list[str]`	List of labels for the legend.	required
`save_path`	`str`	Path to the save file.	`[]`
`extension`	`str`	Extension to save figure in.	`".png"`
`show_fig`	`bool`	Flag to show the figure.	`True`
`**kwargs`		Extra arguments for the figure or axes objects.	`{}`

Returns:

Type	Description
`None`	Nothing is returned.

Source code in src/medpipe/metrics/plots.py

def plot_metrics_CI(
    ci_dict, label_list, save_path="", extension=".png", show_fig=True, **kwargs
):
    """
    Plots the metrics with confidence intrevals for each fold.

    Parameters
    ----------
    ci_dict : dict[str, list[tuple(float, float, float)]]
        Dictionary containing the metric value and confidence intervals.
        The keys are the name of the metrics and the values are a list of tuple
        with first element the metric value, second the lower bound, and third
        the upper bound. One list elements per model. One list elements per model
    label_list : list[str]
        List of labels for the legend.
    save_path : str, default: []
        Path to the save file.
    extension : str, default: ".png"
        Extension to save figure in.
    show_fig : bool, default: True
        Flag to show the figure.
    **kwargs
        Extra arguments for the figure or axes objects.

    Returns
    -------
    None
        Nothing is returned.

    """
    # Split arguments based on where they should be sent
    ax_kwargs = {key: value for key, value in kwargs.items() if key in dir(Axes)}
    fig_kwargs = {key: value for key, value in kwargs.items() if key in dir(Figure)}

    # Set up some variables
    colours = [
        "#2D90D8",
        "#33367A",
        "#96690E",
        "#CDB4DB",
        "#F2CC8F",
    ]
    y_labels = {
        "auroc": "AUROC",
        "ap": "AUPRC",
        "log_loss": "Log loss",
        "accuracy": "Accuracy",
        "recall": "Recall",
        "precision": "Precision",
        "f1": "F1",
    }
    bar_width = 0.3
    x = np.arange(len(label_list)) * bar_width

    # Loop through each metric
    for key, values in ci_dict.items():
        # Set up the figure and axis
        fig, ax = plt.subplots(**fig_kwargs)  # One figure per metric

        for j in range(len(values[0])):
            value = values[0][j]
            lower_b = values[1][j]

            ax.bar(
                x[j],
                value,
                width=bar_width,
                color=colours[j],
                edgecolor=(0, 0, 0, 1),
                label=label_list[j],
            )

            ax.errorbar(
                x[j],
                value,
                yerr=value - lower_b,
                fmt="none",
                color="black",
                capsize=5,
            )

        # Customize the chart
        ax.set_ylabel(y_labels[key], fontweight="bold")
        if key != "log_loss":
            ax.set_ylim([0, 1.05])
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)

        # Remove x ticks
        ax.set_xticks([])
        ax.set_xticklabels([])

        # Place legend for the figure
        fig.legend(loc="center right", title="Models")

        # Set ax_kwargs to override if needed
        for key, val in ax_kwargs.items():
            getattr(ax, key)(val)

        plt.tight_layout()
        fig.subplots_adjust(right=0.7, bottom=0.14)
        if save_path:
            save_file = save_path + key + extension
            file_checks(save_file, extension=extension, exists=False)
            plt.savefig(save_file)
        if show_fig:
            plt.show()

        plt.close()

`plot_prediction_distribution(dist_list, label_list=[], n_bins=10, save_path='', extension='.png', show_fig=True, **kwargs)`

Plots the prediction probabilities.

Parameters:

Name	Type	Description	Default
`dist_list`	`list[array]`	List of the predicted probability distributions.	required
`label_list`	`list[str]`	List of labels for the legend.	`[]`
`n_bins`	`int`	Number of bins for the histogram.	`10`
`save_path`	`str`	Path to the save file.	`[]`
`extension`	`str`	Extension to save figure in.	`".png"`
`show_fig`	`bool`	Flag to show the figure.	`True`
`**kwargs`		Extra arguments for the figure or axes objects.	`{}`

Returns:

Type	Description
`None`	Nothing is returned.

Source code in src/medpipe/metrics/plots.py

def plot_prediction_distribution(
    dist_list,
    label_list=[],
    n_bins=10,
    save_path="",
    extension=".png",
    show_fig=True,
    **kwargs,
):
    """
    Plots the prediction probabilities.

    Parameters
    ----------
    dist_list : list[array]
        List of the predicted probability distributions.
    label_list : list[str]
        List of labels for the legend.
    n_bins : int, default: 10
        Number of bins for the histogram.
    save_path : str, default: []
        Path to the save file.
    extension : str, default: ".png"
        Extension to save figure in.
    show_fig : bool, default: True
        Flag to show the figure.
    **kwargs
        Extra arguments for the figure or axes objects.

    Returns
    -------
    None
        Nothing is returned.

    """
    title = kwargs["set_title"] if "set_title" in kwargs.keys() else ""

    # Split arguments based on where they should be sent
    ax_kwargs = {key: value for key, value in kwargs.items() if key in dir(Axes)}
    fig_kwargs = {key: value for key, value in kwargs.items() if key in dir(Figure)}

    # Set up variables
    colour_list = ["#2D90D8", "#33367A", "#96690E", "#CDB4DB", "#F2CC8F"]
    bins = np.linspace(0, 1, n_bins + 1)

    # Set figure and axes properties
    fig, ax = plt.subplots(**fig_kwargs)  # Create a new figure

    # Set labels and scale
    ax.set_xlabel("Predicted probabilities", fontweight="bold")
    ax.set_ylabel("Count", fontweight="bold")
    ax.set_yscale("log")

    # Set ax_kwargs to override if needed
    for key, val in ax_kwargs.items():
        getattr(ax, key)(val)

    ax.hist(
        dist_list,
        color=colour_list[: len(dist_list)],
        stacked=True,
        edgecolor="black",
        bins=bins,
        label=label_list,
    )
    # Remove spines for aesthetics
    plt.gca().spines["top"].set_visible(False)
    plt.gca().spines["right"].set_visible(False)

    # Add legend
    ax.legend(loc="upper right", bbox_to_anchor=(1.45, 0.9), title="Models")
    ax.set_title(title)
    ax.set_xlim([-0.05, 1.05])  # Set x limits

    # Adjust layout
    plt.tight_layout()
    fig.subplots_adjust(right=0.7, bottom=0.14)

    if save_path:
        save_file = save_path + extension
        file_checks(save_file, extension=extension, exists=False)
        plt.savefig(save_file)
    if show_fig:
        plt.show()

    plt.close()

`plot_reliability_diagrams(y_test, proba_list, label_list=[], save_path='', extension='.png', show_fig=True, display_kwargs={}, **kwargs)`

Plots the reliability diagrams for the given probabilities.

Parameters:

Name	Type	Description	Default
`y_test`	`array-like of shape (n_samples, n_classes)`	Ground truth labels.	required
`proba_list`	`list[array]`	List of predicted probabilities.	required
`label_list`	`list[str]`		`[]`
`save_path`	`str`	Path to the save file.	`[]`
`extension`	`str`	Extension to save figure in.	`".png"`
`show_fig`	`bool`	Flag to show the figure.	`True`
`display_kwargs`	`dict[str, value]`	Extra arguments for the CalibrationDisplay.	`{}`
`**kwargs`		Extra arguments for the figure or axes objects.	`{}`

Returns:

Type	Description
`None`	Nothing is returned.

Source code in src/medpipe/metrics/plots.py

def plot_reliability_diagrams(
    y_test,
    proba_list,
    label_list=[],
    save_path="",
    extension=".png",
    show_fig=True,
    display_kwargs={},
    **kwargs,
):
    """
    Plots the reliability diagrams for the given probabilities.

    Parameters
    ----------
    y_test : array-like of shape (n_samples, n_classes)
        Ground truth labels.
    proba_list : list[array]
        List of predicted probabilities.
    label_list : list[str], default: []
    save_path : str, default: []
        Path to the save file.
    extension : str, default: ".png"
        Extension to save figure in.
    show_fig : bool, default: True
        Flag to show the figure.
    display_kwargs : dict[str, value], default: {}
        Extra arguments for the CalibrationDisplay.
    **kwargs
        Extra arguments for the figure or axes objects.

    Returns
    -------
    None
        Nothing is returned.

    """
    colours = ["#2D90D8", "#33367A", "#96690E", "#CDB4DB", "#F2CC8F"]

    # Split arguments based on where they should be sent
    ax_kwargs = {key: value for key, value in kwargs.items() if key in dir(Axes)}
    fig_kwargs = {key: value for key, value in kwargs.items() if key in dir(Figure)}

    # Set figure and axes properties
    fig, ax = plt.subplots(**fig_kwargs)  # Create a new figure

    # Plot perfect calibration
    ax.plot(
        np.linspace(0, 1, 100),
        np.linspace(0, 1, 100),
        "k--",
        label="Perfectly calibrated",
    )

    for i in range(len(proba_list)):
        prob_true, prob_pred = calibration_curve(
            y_test,
            proba_list[i],
            **display_kwargs,
        )
        ax.plot(
            prob_pred,
            prob_true,
            marker=".",
            color=colours[i],
            label=label_list[i],
        )

    title = kwargs["set_title"] if "set_title" in kwargs.keys() else ""
    ax.set_title(title)
    ax.set_xlabel("Predicted probabilities", fontweight="bold")
    ax.set_ylabel("Observed proportion", fontweight="bold")

    # Set ax_kwargs to override if needed
    for key, val in ax_kwargs.items():
        getattr(ax, key)(val)

    ax.legend(loc="upper right", bbox_to_anchor=(1.6, 0.9))
    plt.tight_layout()
    fig.subplots_adjust(right=0.66, bottom=0.14)

    if save_path:
        save_file = save_path + extension
        file_checks(save_file, extension=extension, exists=False)
        plt.savefig(save_file)
    if show_fig:
        plt.show()

    plt.close()

core

`medpipe.metrics.core`

Core metric functions module.

This module provides functions to compute and print metrics.

Functions: - print_metrics: prints the numerical metrics. - print_metrics_CI: prints numerical metrics with their confidence intervals. - compute_all_CI: computes the confidence interval for all metrics. - compute_CI: computes the confidence interval. - extract_metric : extracts a metric for each fold. - compute_pred_metrics : computes the metrics that require the prediction labels. - compute_score_metrics : computes the metrics that require the score.

`compute_CI(data)`

Computes the confidence interval of the data.

The CI is calculated using the Student's t-distribution.

Parameters:

Name	Type	Description	Default
`data`	`array-like of shape (n_samples, n_sets)`	Data on which to compute the confidence interval.	required

Returns:

Name	Type	Description
`mean_arr`	`np.array(float) of shape (n_sets,)`	Mean values.
`lower_b_arr`	`np.array(float) of shape (n_sets,)`	Lower bound of the confidence intervals.
`upper_b_arr`	`np.array(float) of shape (n_sets,)`	Upper bound of the confidence intervals.

Raises:

Type	Description
`TypeError`	If data is not array-like

Source code in src/medpipe/metrics/core.py

def compute_CI(data):
    """
    Computes the confidence interval of the data.

    The CI is calculated using the Student's t-distribution.

    Parameters
    ----------
    data : array-like of shape (n_samples, n_sets)
        Data on which to compute the confidence interval.

    Returns
    -------
    mean_arr : np.array(float) of shape (n_sets,)
        Mean values.
    lower_b_arr : np.array(float) of shape (n_sets,)
        Lower bound of the confidence intervals.
    upper_b_arr : np.array(float) of shape (n_sets,)
        Upper bound of the confidence intervals.

    Raises
    ------
    TypeError
        If data is not array-like

    """
    array_check(data)
    if type(data) is type([]):
        # Convert to array if needed
        arr_data = np.array(data)
    else:
        arr_data = data

    if len(arr_data.shape) == 1:
        # Make sure there are 2 dimensions
        arr_data = np.expand_dims(arr_data, 1)

    mean_arr = np.zeros(arr_data.shape[1])
    lower_b_arr = np.zeros(arr_data.shape[1])
    upper_b_arr = np.zeros(arr_data.shape[1])

    for i in range(arr_data.shape[1]):
        mean_arr[i] = np.mean(arr_data[:, i])
        std_err = sem(arr_data[:, i])

        lower_b_arr[i], upper_b_arr[i] = t.interval(
            0.95, len(arr_data[:, i]) - 1, loc=mean_arr[i], scale=std_err
        )

    return mean_arr, lower_b_arr, upper_b_arr

`compute_all_CI(model_metrics, metric_list=[], **kwargs)`

Computes the confidence intervals for all metrics.

Parameters:

Name	Type	Description	Default
`model_metrics`	`dict[int, dict[str, float or tuple(array - like)]]`	Model metrics for different folds.	required
`metric_list`	`list[str]`	List of metrics to calculate confidence interval.	`[]`
`**kwargs`		Extra arguments for the compute_CI function.	`{}`

Returns:

Name	Type	Description
`ci_dict`	`dict[str, tuple(float, float, float)]`	Dictionary containing the metric value and confidence intervals. The keys are the name of the metrics and the values are a tuple with first element the metric value, second the lower bound, and third the upper bound.

Source code in src/medpipe/metrics/core.py

def compute_all_CI(model_metrics, metric_list=[], **kwargs):
    """
    Computes the confidence intervals for all metrics.

    Parameters
    ----------
    model_metrics : dict[int, dict[str, float or tuple(array-like)]]
        Model metrics for different folds.
    metric_list : list[str], default: []
        List of metrics to calculate confidence interval.
    **kwargs
        Extra arguments for the compute_CI function.

    Returns
    -------
    ci_dict : dict[str, tuple(float, float, float)]
        Dictionary containing the metric value and confidence intervals.
        The keys are the name of the metrics and the values are a tuple with
        first element the metric value, second the lower bound, and third the
        upper bound.

    """
    ci_dict = {}  # Empty dict to contain the confidence intervals for metrics
    metrics = next(iter(model_metrics.values())).keys()

    if metric_list == []:
        # Default to all metrics if not specified
        metric_list = list(metrics)

    for metric in metrics:
        if metric == "roc" or metric == "prc" or metric not in metric_list:
            # Skip ROC, PRC, and metrics not in the given list
            continue
        metric_values = extract_metric(model_metrics, metric)
        ci_dict.update({metric: compute_CI(metric_values, **kwargs)})

    return ci_dict

`compute_pred_metrics(metric_list, y_true, y_pred)`

Computes the metrics that require the prediction labels.

Parameters:

Name	Type	Description	Default
`metric_list`	`list[str]`	List of metrics. Possible values: - accuracy - f1 - precision - recall	required
`y_true`	`array-like of shape (n_samples, n_classes)`	Ground truth labels.	required
`y_pred`	`array-like of shape (n_samples, n_classes)`	Predicted labels.	required

Returns:

Name	Type	Description
`metric_dict`	`dict[str, list[float]]`	Dictionary of the metrics. The keys are the name of the metric and the values are the computed metric value. If multilabel then the list contains the value for each class and the last value is the average value.

Raises:

Type	Description
`ValueError`	If the metric is not recognised.

Source code in src/medpipe/metrics/core.py

def compute_pred_metrics(metric_list, y_true, y_pred):
    """
    Computes the metrics that require the prediction labels.

    Parameters
    ----------
    metric_list : list[str]
        List of metrics. Possible values:
         - accuracy
         - f1
         - precision
         - recall
    y_true : array-like of shape (n_samples, n_classes)
        Ground truth labels.
    y_pred : array-like of shape (n_samples, n_classes)
        Predicted labels.

    Returns
    -------
    metric_dict : dict[str, list[float]]
        Dictionary of the metrics. The keys are the name of the metric
        and the values are the computed metric value.
        If multilabel then the list contains the value for each class and
        the last value is the average value.

    Raises
    ------
    ValueError
        If the metric is not recognised.

    """
    metric_dict = {}
    multilabel = False
    average = "binary"

    if len(y_true.shape) > 1:
        # Multilabel situation
        multilabel = True
        average = None

    if "accuracy" in metric_list:
        # Deal with accuracy separately to get accuracy for each label
        values = []  # Store values

        if multilabel:
            # Iterate over each label and add individual label accuracy
            for i in range(y_true.shape[1]):
                values.append(skl.metrics.accuracy_score(y_true[:, i], y_pred[:, i]))

        values.append(skl.metrics.accuracy_score(y_true, y_pred))
        metric_dict.update({"accuracy": values})
        metric_list.remove("accuracy")

    for metric in metric_list:
        values = []  # Create empty list to hold the metrics for each label
        match metric:
            case "f1":
                values.append(skl.metrics.f1_score(y_true, y_pred, average=average))
                if multilabel:
                    values = np.append(
                        values,
                        skl.metrics.f1_score(y_true, y_pred, average="weighted"),
                    )
                metric_dict.update({metric: values})

            case "precision":
                values.append(
                    skl.metrics.precision_score(
                        y_true, y_pred, average=average, zero_division=0.0
                    )
                )
                if multilabel:
                    values = np.append(
                        values,
                        skl.metrics.precision_score(
                            y_true, y_pred, average="weighted", zero_division=0.0
                        ),
                    )
                metric_dict.update({metric: values})

            case "recall":
                values.append(skl.metrics.recall_score(y_true, y_pred, average=average))
                if multilabel:
                    values = np.append(
                        values,
                        skl.metrics.recall_score(y_true, y_pred, average="weighted"),
                    )
                metric_dict.update({metric: values})

            case _:
                raise ValueError(f"{metric} is an unrecognised metric")

    return metric_dict

`compute_score_metrics(metric_list, y_true, y_pred_proba)`

Computes the metrics that require the score.

Parameters:

Name	Type	Description	Default
`metric_list`	`list[str]`	List of metrics. Possible values: - roc - auroc (area under the curve) - prc (precision-recall curve) - ap (average precision) - log_loss	required
`y_true`	`array-like of shape (n_samples, n_classes)`	Ground truth labels.	required
`y_pred_proba`	`array or list[array]`	Predicted scores.	required

Returns:

Name	Type	Description
`metric_dict`	`dict[str, list[float or tuple]]`	Dictionary of the metrics. The keys are the name of the metric and the values are the computed metric values. If multilabel then the list contains the value for each class.

Raises:

Type	Description
`ValueError`	If the metric is not recognised.

Source code in src/medpipe/metrics/core.py

def compute_score_metrics(metric_list, y_true, y_pred_proba):
    """
    Computes the metrics that require the score.

    Parameters
    ----------
    metric_list : list[str]
        List of metrics. Possible values:
         - roc
         - auroc (area under the curve)
         - prc (precision-recall curve)
         - ap (average precision)
         - log_loss
    y_true : array-like of shape (n_samples, n_classes)
        Ground truth labels.
    y_pred_proba : np.array or list[np.array]
        Predicted scores.

    Returns
    -------
    metric_dict : dict[str, list[float or tuple]]
        Dictionary of the metrics. The keys are the name of the metric
        and the values are the computed metric values.
        If multilabel then the list contains the value for each class.

    Raises
    ------
    ValueError
        If the metric is not recognised.

    """
    metric_dict = {}
    multilabel = True

    if len(y_true.shape) == 1:
        # Make into a list
        y_true = np.expand_dims(y_true, 1)
        if len(y_pred_proba.shape) == 2:
            y_pred_proba = np.expand_dims(y_pred_proba, 0)
        multilabel = False

    for metric in metric_list:
        values = []  # Create empty list to hold the metrics for each label
        for i, scores in enumerate(y_pred_proba):
            match metric:
                case "roc":
                    values.append(skl.metrics.roc_curve(y_true[:, i], scores[:, 1]))
                case "auroc":
                    values.append(skl.metrics.roc_auc_score(y_true[:, i], scores[:, 1]))
                case "prc":
                    values.append(
                        skl.metrics.precision_recall_curve(y_true[:, i], scores[:, 1])
                    )
                case "ap":
                    values.append(
                        skl.metrics.average_precision_score(y_true[:, i], scores[:, 1])
                    )
                case "log_loss":
                    values.append(skl.metrics.log_loss(y_true[:, i], scores[:, 1]))
                case _:
                    raise ValueError(f"{metric} is an unrecognised metric")

            metric_dict.update({metric: values})

        if multilabel:
            if metric == "ap" or metric == "auroc" or metric == "log_loss":
                # Add the average log loss, AUROC, and AP score
                metric_dict[metric].append(np.mean(metric_dict[metric]))
    return metric_dict

`extract_metric(model_metrics, metric_name)`

Extracts the desired metric from each fold in the metric dictionary.

Parameters:

Name	Type	Description	Default
`model_metrics`	`dict[int, dict[str, float or tuple(array - like)]]`	Model metrics for different folds.	required
`metric_name`	`str`	Name of the metric to extract.	required

Returns:

Name	Type	Description
`metric_list`	`list[float]`	List containing the metric values for each fold.

Source code in src/medpipe/metrics/core.py

def extract_metric(model_metrics, metric_name):
    """
    Extracts the desired metric from each fold in the metric dictionary.

    Parameters
    ----------
    model_metrics : dict[int, dict[str, float or tuple(array-like)]]
        Model metrics for different folds.
    metric_name : str
        Name of the metric to extract.

    Returns
    -------
    metric_list : list[float]
        List containing the metric values for each fold.

    """
    metric_list = []

    for metrics in model_metrics.values():
        # Loop through values directly to get the desired metric
        metric_list.append(metrics[metric_name])

    return metric_list

`print_metrics(metric_dict, label_list, logger=None)`

Prints the metrics on the terminal.

Parameters:

Name	Type	Description	Default
`metric_dict`	`dict[str, float or tuple(array - like)]`	Dictionary of the model performance for one fold. Keys are the metric name and values are the metric value. The test metrics used are: - accuracy - f1 - precision - recall - log_loss - auroc (Area Under Receiver Operator Characteristic) - ap (Average Precision)	required
`label_list`	`list[str]`	List of predicted labels.	required
`logger`	`Logger`	Logger object to log prints. If None print to terminal.	`None`

Returns:

Type	Description
`None`	Nothing is returned.

Source code in src/medpipe/metrics/core.py

def print_metrics(metric_dict, label_list, logger=None) -> None:
    """
    Prints the metrics on the terminal.

    Parameters
    ----------
    metric_dict : dict[str, float or tuple(array-like)]
        Dictionary of the model performance for one fold.
        Keys are the metric name and values are the metric value.
        The test metrics used are:
         - accuracy
         - f1
         - precision
         - recall
         - log_loss
         - auroc (Area Under Receiver Operator Characteristic)
         - ap (Average Precision)
    label_list : list[str]
        List of predicted labels.
    logger : logging.Logger, default: None
        Logger object to log prints. If None print to terminal.

    Returns
    -------
    None
        Nothing is returned.

    """
    n_it = len(label_list)  # Number of print iterations

    for i in range(n_it):
        # If label_list is a list
        print_message(f"  {label_list[i]} metrics:", logger, SCRIPT_NAME)

        print_message(
            f"    Accuracy: {metric_dict["accuracy"][i]:.3f}", logger, SCRIPT_NAME
        )
        print_message(f"    F1: {metric_dict["f1"][i]:.3f}", logger, SCRIPT_NAME)
        print_message(
            f"    Precision: {metric_dict["precision"][i]:.3f}", logger, SCRIPT_NAME
        )
        print_message(
            f"    Recall: {metric_dict["recall"][i]:.3f}", logger, SCRIPT_NAME
        )
        print_message(
            f"    Log loss: {metric_dict["log_loss"][i]:.3f}", logger, SCRIPT_NAME
        )
        print_message(f"    AUROC: {metric_dict["auroc"][i]:.3f}", logger, SCRIPT_NAME)
        print_message(f"    AP: {metric_dict["ap"][i]:.3f}", logger, SCRIPT_NAME)

`print_metrics_CI(ci_dict, label_list, logger=None)`

Prints the metrics with their confidence intervals.

Parameters:

Name	Type	Description	Default
`ci_dict`	`dict[str, tuple(float, float, float)]`	Dictionary containing the metric value and confidence intervals. The keys are the name of the metrics and the values are a tuple with first element the metric value, second the lower bound, and third the upper bound.	required
`logger`	`Logger`	Logger object to log prints. If None print to terminal.	`None`

Returns:

Type	Description
`None`	Nothing is returned.

Source code in src/medpipe/metrics/core.py

def print_metrics_CI(ci_dict, label_list, logger=None):
    """
    Prints the metrics with their confidence intervals.

    Parameters
    ----------
    ci_dict : dict[str, tuple(float, float, float)]
        Dictionary containing the metric value and confidence intervals.
        The keys are the name of the metrics and the values are a tuple with
        first element the metric value, second the lower bound, and third the
        upper bound.
    logger : logging.Logger, default: None
        Logger object to log prints. If None print to terminal.

    Returns
    -------
    None
        Nothing is returned.

    """
    n_it = len(label_list)  # Number of print iterations

    for i in range(n_it):
        # If label_list is a list
        if i < len(label_list):
            print_message(f"  {label_list[i]} metrics:", logger, SCRIPT_NAME)
        else:
            print_message("  Global metrics:", logger, SCRIPT_NAME)

        for metric in ci_dict.keys():
            stat, lb, ub = ci_dict[metric]
            print_message(
                f"    {metric.capitalize()}: {stat[i]:.3f} CI [{lb[i]:.3f}, {ub[i]:.3f}]",
                logger,
                SCRIPT_NAME,
            )