Simulate predictions from posterior distributions of the choosen model.
rm(list = ls()); gc()
getwd()
library(tidyverse)
library(INLA)
data_path: path to folder containing the inla data output_path: path to folder where to save any output from this script
# function to autofit flextable to desired word page width in cm
FitFlextableToPage <- function(ft, pgwidth = 17){ # desired pgwidth in cm
ft_out <- ft %>% autofit()
ft_out <- width(ft_out, width = dim(ft_out)$widths * pgwidth * 0.3937 /(flextable_dim(ft_out)$widths))
return(ft_out)
}
# function to calculate LCPO
# calculate LCPO
slcpo <- function(m, na.rm = TRUE) {
- sum(log(m$cpo$cpo), na.rm = na.rm)
}
# all INLA model files & model setup files
IM_models <- tibble(IM_file = list.files(file.path(model_output_path))) %>%
mutate(model_setup_file = paste0("model_setup_", IM_file)) %>%
filter(str_starts(IM_file, "IM\\w+\\d+_no_priors")) %>%
mutate(model_name = case_when(str_detect(IM_file, "pred") == TRUE ~ paste0(str_extract(IM_file, "IM_[A-Z]+[0-9]+"), "_pred"),
TRUE ~ str_extract(IM_file, "IM_[A-Z]+[0-9]+"))) #str_subset("IM\\w+\\d+_no_priors")
IM_models
model_list <- list()
model_setup_list <- list()
for (model_file in IM_models$IM_file){
model_name <- IM_models %>% filter(IM_file == model_file) %>% pull(model_name)
model_list[[model_name]] <- readRDS(file.path(model_output_path, model_file))
model_setup_list[[model_name]] <- readRDS(file.path(model_output_path, filter(IM_models, IM_file == model_file) %>% pull(model_setup_file) ))
}
# this function applies inla.posterior.sample and processes the results to return a simulation dataframe in wide and long format
simulate_predictions_of_inla_model <- function(model_name,
IM_path,
IM_model_setup_path,
results_path,
seed_num = NULL,
nSim = 1000
){
# function checks
## check if any required arguments are missing
checkmate::anyMissing(c(model_name, IM_path, IM_model_setup_path, results_path))
# read INLA model and model setup
message("reading INLA model and model setup files")
IM <- read_rds(IM_path)
IM_model_setup_data <- read_rds(IM_model_setup_path)$data
# set pred_results_path results_path + model_name
pred_results_path <- file.path(results_path, model_name)
if(!dir.exists(pred_results_path)){dir.create(pred_results_path, recursive = TRUE)}
# apply inla.posterior.sample function
## set seed if provided as seed_num
if(!is.null(seed_num)){
set.seed(seed_num)
message(paste0("Applying inla.posterior.sample function with ", nSim, " simulations and with seed set to ", seed_num, "."))
}
else{
message(paste0("Applying inla.posterior.sample function with ", nSim, " simulations and with random seed."))
}
Sim <- lapply(inla.posterior.sample(n = nSim, IM), function(x){x[["latent"]]})
rm(IM)
# process simulation results
message("processing simulation results")
# index of APpredictor
APredictor_rownum <- grep(paste0("^", "APredictor"), rownames(Sim[[1]]))
#make index for which records are Apredictor(predictors for rows in data)
pred_sim <- t(do.call(rbind, lapply(Sim, function(x){as.numeric(x)[APredictor_rownum]}))) #select data using index APredictor_rownum,
#make into numeric vector and then into matrix
rm(Sim)
pred_sim <- exp(pred_sim[is.na(IM_model_setup_data$NoPerHaul),]) #select from matrix only rows where input data was NA,
#these do not yet have the link function and that they _do_ have the offset included,
#so just exponentiate
colnames(pred_sim) <- paste0("sim", 1:nSim) # select colnames to tally
pred_sim <- IM_model_setup_data %>%
filter(is.na(NoPerHaul)) %>%
add_column(as.data.frame(pred_sim)) # add simulation results to data
# add Exploit_Maturity for grouping
pred_sim <- within(pred_sim,{ Exploit_Maturity = NA
Exploit_Maturity[ Unexploited_Immature == 1] <- "Unexploited_Immature"
Exploit_Maturity[ Exploited_Immature == 1] <- "Exploited_Immature"
Exploit_Maturity[ Exploited_Mature == 1] <- "Exploited_Mature"
}) %>% relocate(Exploit_Maturity, .before = Unexploited_Immature)
# save pred_sim
message("saving simulation results")
write_rds(pred_sim, file = file.path(pred_results_path, paste0(model_name, "_sim", nSim, ".rds"))) # ~ 17GB
}
model_name = "IM_ST10_pred"
(IM_ST10_no_prior_pred_path <- file.path(model_output_path, list.files(model_output_path)[list.files(model_output_path) %>%
str_detect("IM_ST10_no_priors_pred")][!list.files(model_output_path)[list.files(model_output_path)%>%
str_detect("IM_ST10_no_priors_pred")] %>% str_detect("model_setup")]))
(model_setup_path <- file.path(model_output_path, list.files(model_output_path)[list.files(model_output_path)%>%
str_detect("IM_ST10_no_priors_pred")][list.files(model_output_path)[list.files(model_output_path)%>%
str_detect("IM_ST10_no_priors_pred")] %>% str_detect("model_setup")]))
# simulate IM_ST10_pred
simulate_predictions_of_inla_model(model_name = "IM_ST10_pred",
IM_path = IM_ST10_no_prior_pred_path,
IM_model_setup_path = model_setup_path,
results_path = results_path,
nSim = 1000,
seed_num = 2156)