from medpipe import (
Pipeline,
compute_all_CI,
compute_score_metrics,
exception_handler,
extract_labels,
get_full_proba,
get_positive_proba,
load_data_from_csv,
load_pipeline,
plot_metrics_CI,
print_message,
read_toml_configuration,
setup_logger,
)
from medpipe.utils.config import get_configuration, get_file_path, split_version_number
if __name__ == "__main__":
try:
print_message("Loading parameters from configuration file")
# Read log and general configuration file
log_config = read_toml_configuration("log_config.toml")
general_config = read_toml_configuration("config_file.toml")
print_message("Setting up logger")
# Create logger
script_name = "load_pipeline.py"
log_dir = log_config["base_dir"] + log_config["log_dir"]
log_dir = log_config["log_dir"]
logger = setup_logger(script_name, log_dir)
print_message(
f"Version number: {general_config["version"]}", logger, script_name
)
except (TypeError, ValueError, FileNotFoundError, IsADirectoryError) as err:
sys.stderr.write("An error occured when trying to create the logger\n")
sys.stderr.write(repr(err))
exit(1)
try:
data_version, _ = split_version_number(general_config["version"])
# Get data configuration parameters and load the data
data_config = get_configuration(
general_config["data_parameters"],
data_version,
)
data = load_data_from_csv(
get_file_path( # Get data path based on config parameters
data_config, v_number=data_version[:4] # Use only first 2 numbers
)
)
# Load model
print_message("Loading model", logger, script_name)
load_file = get_file_path(
general_config,
v_number=general_config["version"],
)
pipeline = load_pipeline(load_file)
# Transform the loaded data based on fitted operations
data = pipeline.preprocessor.transform(data)
# Get the test set from the data
print_message("Preparing test set", logger, script_name)
X_train, X_test = pipeline.get_test_data(data)
X_test, y_test = extract_labels(X_test, pipeline.label_list)
# Compute statistics and plots
print_message("Computing model statistics", logger, script_name)
group_name = data_config["split_variables"]["group_name"]
extension = general_config["fig_parameters"]["extension"]
label_list = ["Unadjusted", "Recalibrated"]
for i, label in enumerate(pipeline.label_list):
# Plot for each outcome individually
metric_dict = {} # Store unadjusted values
metric_dict_cal = {} # Store recalibrated values
for key in pipeline.predictor_probabilities[label]:
# Compute metric values for both unadjusted and recalibrated
y_true = y_train[X_train[group_name] == key]
metric_dict[key] = compute_score_metrics(
["auroc", "ap", "log_loss"],
y_true[:, i],
get_full_proba(pipeline.predictor_probabilities[label][key]),
)
metric_dict_cal[key] = compute_score_metrics(
["auroc", "ap", "log_loss"],
y_true[:, i],
get_full_proba(pipeline.calibrator_probabilities[label][key]),
)
for k in metric_dict[key].keys():
metric_dict[key][k] += metric_dict_cal[key][k]
# Create one CI dict for both outcomes and plot results
ci_dict = compute_all_CI(metric_dict)
plot_metrics_CI(
ci_dict,
label_list=label_list,
dpi=300,
figsize=(5, 5),
extension=extension,
)
except Exception:
exception_handler(logger, log_dir, log_config, script_name)
exit(1)