Aim

Simulate predictions from posterior distributions of the choosen model.

Setup

Clear global environment

rm(list = ls()); gc()
getwd()

Load libraries

library(tidyverse)
library(INLA)

Setup file paths:

data_path: path to folder containing the inla data output_path: path to folder where to save any output from this script

additional functions

# function to autofit flextable to desired word page width in cm
FitFlextableToPage <- function(ft, pgwidth = 17){ # desired pgwidth in cm
  ft_out <- ft %>% autofit()
  ft_out <- width(ft_out, width = dim(ft_out)$widths * pgwidth * 0.3937 /(flextable_dim(ft_out)$widths))
  return(ft_out)
}

# function to calculate LCPO
# calculate LCPO
slcpo <- function(m, na.rm = TRUE) {
  - sum(log(m$cpo$cpo), na.rm = na.rm)
}

Load all inla models & compare

Load models

# all INLA model files & model setup files
IM_models <- tibble(IM_file = list.files(file.path(model_output_path))) %>% 
  mutate(model_setup_file = paste0("model_setup_", IM_file)) %>%
  filter(str_starts(IM_file, "IM\\w+\\d+_no_priors")) %>% 
  mutate(model_name = case_when(str_detect(IM_file, "pred") == TRUE ~ paste0(str_extract(IM_file, "IM_[A-Z]+[0-9]+"), "_pred"),
                                TRUE ~ str_extract(IM_file, "IM_[A-Z]+[0-9]+"))) #str_subset("IM\\w+\\d+_no_priors")
IM_models

model_list <- list()
model_setup_list <- list()
for (model_file in IM_models$IM_file){
  model_name <- IM_models %>% filter(IM_file == model_file) %>% pull(model_name)
  model_list[[model_name]] <- readRDS(file.path(model_output_path, model_file))
  model_setup_list[[model_name]] <- readRDS(file.path(model_output_path, filter(IM_models, IM_file == model_file) %>% pull(model_setup_file) ))
}

Simulation of predicted numbers

Function for running simulations of prediction INLA models

# this function applies inla.posterior.sample and processes the results to return a simulation dataframe in wide and long format
simulate_predictions_of_inla_model <- function(model_name,
                                               IM_path, 
                                               IM_model_setup_path,
                                               results_path,
                                               seed_num = NULL,
                                               nSim = 1000
){
  # function checks
  ## check if any required arguments are missing
  checkmate::anyMissing(c(model_name, IM_path, IM_model_setup_path, results_path))
  
  # read INLA model and model setup
  message("reading INLA model and model setup files")
  IM <- read_rds(IM_path)
  IM_model_setup_data <- read_rds(IM_model_setup_path)$data
  
  # set pred_results_path results_path + model_name
  pred_results_path <- file.path(results_path, model_name)
  if(!dir.exists(pred_results_path)){dir.create(pred_results_path, recursive = TRUE)}
  
  # apply inla.posterior.sample function
  ## set seed if provided as seed_num
  if(!is.null(seed_num)){
    set.seed(seed_num)
    message(paste0("Applying inla.posterior.sample function with ", nSim, " simulations and with seed set to ", seed_num, "."))
  }
  else{
    message(paste0("Applying inla.posterior.sample function with ", nSim, " simulations and with random seed."))
  }

  Sim <- lapply(inla.posterior.sample(n = nSim, IM), function(x){x[["latent"]]})
  
  rm(IM)
  
  # process simulation results
  message("processing simulation results")

  # index of APpredictor
  APredictor_rownum <- grep(paste0("^", "APredictor"), rownames(Sim[[1]]))
  
  #make index for which records are Apredictor(predictors for rows in data)
  pred_sim  <- t(do.call(rbind, lapply(Sim, function(x){as.numeric(x)[APredictor_rownum]}))) #select data using index APredictor_rownum, 
                                                                                              #make into numeric vector and then into matrix
  rm(Sim)
    
  pred_sim <- exp(pred_sim[is.na(IM_model_setup_data$NoPerHaul),]) #select from matrix only rows where input data was NA,
                                                             #these do not yet have the link function and that they _do_ have the offset included,
                                                             #so just exponentiate  
 
  colnames(pred_sim) <-  paste0("sim", 1:nSim) # select colnames to tally
  
  pred_sim <- IM_model_setup_data  %>%
    filter(is.na(NoPerHaul)) %>%
    add_column(as.data.frame(pred_sim)) # add simulation results to data
  
  # add Exploit_Maturity for grouping
  pred_sim <- within(pred_sim,{ Exploit_Maturity = NA 
                    Exploit_Maturity[ Unexploited_Immature == 1] <- "Unexploited_Immature"
                    Exploit_Maturity[ Exploited_Immature == 1] <- "Exploited_Immature"
                     Exploit_Maturity[ Exploited_Mature == 1]  <-  "Exploited_Mature"
  }) %>% relocate(Exploit_Maturity, .before = Unexploited_Immature)
  
  # save pred_sim
  message("saving simulation results")
  write_rds(pred_sim, file = file.path(pred_results_path, paste0(model_name, "_sim", nSim, ".rds"))) # ~ 17GB
}

Simualte results for IM_ST10_pred:

model_name = "IM_ST10_pred"

(IM_ST10_no_prior_pred_path <-  file.path(model_output_path, list.files(model_output_path)[list.files(model_output_path) %>%
  str_detect("IM_ST10_no_priors_pred")][!list.files(model_output_path)[list.files(model_output_path)%>%  
  str_detect("IM_ST10_no_priors_pred")] %>% str_detect("model_setup")]))

(model_setup_path <- file.path(model_output_path, list.files(model_output_path)[list.files(model_output_path)%>%
  str_detect("IM_ST10_no_priors_pred")][list.files(model_output_path)[list.files(model_output_path)%>% 
  str_detect("IM_ST10_no_priors_pred")] %>% str_detect("model_setup")]))

# simulate IM_ST10_pred
simulate_predictions_of_inla_model(model_name = "IM_ST10_pred",
                                   IM_path             = IM_ST10_no_prior_pred_path,
                                   IM_model_setup_path = model_setup_path,
                                   results_path = results_path,
                                   nSim = 1000,
                                   seed_num = 2156)