#################################
# Gateway Factor:data treatment #
#################################

rm(list = ls())
library(tidyverse)

# read country data (names for Great Britain in sources changed to 'UK')
## read CTP & CBRE data on class A logistics building footprint per capita per country
ctp <- tibble(country = c("UnitedStates","Netherlands","Germany","Czechia","France","Poland","UK","Slovakia","Spain","Italy",
                          "Hungary","Romania","Bulgaria","Serbia"), 
              ctp_percap = c(2.85,2.20,1.13,1.00,0.69,0.69,0.68,0.64,0.43,0.42,0.42,0.31,0.25,0.21))
## read Eurostat & worldbank country data
### population
pop <- read_csv2("gateway_factor/data/EU_pop2022.csv")
  uk <- tibble(country = "UK", Pop2022 = 67508936)
  rbind(pop,uk) -> pop
### trade volume imports and exports 
trade <- read_csv2("gateway_factor/data/trade.csv")
  uk <- tibble(country = "UK", Exports = 854000*1.16, Imports = 905000*1.16, Balance = Exports - Imports)
  rbind(trade,uk) -> trade
### value added share of transport/logistics/trade/retail sectors and manufacturing industry sectors
value <-  read_csv2("gateway_factor/data/EU_valueadded_NACE10.csv") |> select(GEO,NACE_R2,VAL) |> 
  pivot_wider(names_from = NACE_R2, values_from = VAL) |> rename(country = GEO) |>
  mutate(log_share = as.numeric(`G-I`)/as.numeric(TOTAL),
         ind_share = as.numeric(`B-E`)/as.numeric(TOTAL))
### container throughput in twenty foot equivalent units (TEU)
teu <- read_csv2("gateway_factor/data/TEU_throughput_world.csv") |> select(country,TEU_2019)
  teu$TEU_2019[is.na(teu$TEU_2019)] <- 0
### e-commerce penetration rate
ecom <- read_csv2("gateway_factor/data/EU_ecommerce.csv") |> select(country,Ecom_2022)
### household expenditure per capita
expenditure <- read_csv2("gateway_factor/data/EU_expenditure.csv")

# join data and write model data file
modeldata <- pop |> left_join(value) |> left_join(teu) |> left_join(ecom) |> left_join(ctp) |> 
  left_join(trade) |> left_join(expenditure) |>
  mutate(val_cap = TOTAL/Pop2022, teu_cap = TEU_2019/Pop2022, 
         bal_cap = Balance/Pop2022, imp_cap = Imports/Pop2022, 
         exp_cap = Exports/Pop2022)
write_csv(modeldata,"gateway_factor/processed/modeldata.csv")