##### Calculating inequality at the FUA-level #####
# 01_load_and_process_data script
# 28/10/2024
#####

# load the required packages
library(haven)
library(ineq)
library(DescTools)
library(tidyverse)
library(OasisR)
library(seg)
library(sf)
library(sp)

# Do not use scientific notation
options(scipen = 9999999999)

#### Load and prepare data for 2022 ####

# Load the data
data <- read.csv("raw_data/database_2022.csv") # only if not already loaded

# Delete missing values
data <- data[data$VEHW1000VERH != 99999999999, ] #### deleting the missing values of this variable deletes all missing values of all variables

# Calculate wealth per capita in each household
data$Wealth_per_capita <- (data$VEHW1000VERH/data$INHAHL)

# Calculate income per capita in each household
data$Income_per_capita <- (data$INHBESTINKH/data$INHAHL)

# Calculate wealth per capita in each household
data$Real_estate_wealth_per_capita <- (data$VEHW1120ONRH/data$INHAHL)

# Calculate movable wealth
data$Movable_wealth <- data$VEHW1110FINH + data$VEHW1130ONDH + data$VEHW1140ABEH + data$VEHW1150OVEH

# Calculate movable wealth per capita
data$Movable_wealth_per_capita <- (data$Movable_wealth/data$INHAHL)


#### Calculate FUA-level indicators of inequality ####

## Calculate Gini of wealth

# calculate Gini coefficients for every Functional Urban Area
Gini_wealth_unadjusted_FUA <- aggregate(VEHW1000VERH ~ FUA , data = data, FUN = ineq::Gini)

# rename the columns to make them make more sense
Gini_wealth_unadjusted_FUA <- Gini_wealth_unadjusted_FUA %>%
  rename(Gini_wealth_unadjusted = VEHW1000VERH)

## Calculate Gini of income

# calculate Gini coefficients for every Functional Urban Area
Gini_income_unadjusted_FUA <- aggregate(INHBESTINKH ~ FUA , data = data, FUN = ineq::Gini)

# rename the columns to make them make more sense
Gini_income_unadjusted_FUA <- Gini_income_unadjusted_FUA %>%
  rename(Gini_income_unadjusted = INHBESTINKH)

## Calculate Gini of wealth per capita

# calculate Gini coefficients for every Functional Urban Area
Gini_wealth_per_capita_FUA <- aggregate(Wealth_per_capita ~ FUA , data = data, FUN = ineq::Gini)

# rename the columns to make them make more sense
Gini_wealth_per_capita_FUA <- Gini_wealth_per_capita_FUA %>%
  rename(Gini_wealth_per_capita = Wealth_per_capita)

## Calculate Gini of income per capita

# Calculate Gini coefficients for every Functional Urban Area
Gini_income_per_capita_FUA <- aggregate(Income_per_capita ~ FUA , data = data, FUN = ineq::Gini)

# rename the columns to make them make more sense
Gini_income_per_capita_FUA <- Gini_income_per_capita_FUA  %>%
  rename(Gini_income_per_capita = Income_per_capita)

## Calculate Gini of income adjusted for household size and composition

# Calculate Gini coefficients for every Functional Urban Area
Gini_income_adjusted_FUA <- aggregate(INHGESTINKH ~ FUA , data = data, FUN = ineq::Gini)

# rename the columns to make them make more sense
Gini_income_adjusted_FUA <- Gini_income_adjusted_FUA  %>%
  rename(Gini_income_adjusted = INHGESTINKH)

## Calculate Gini of real estate wealth per capita

# calculate Gini coefficients for every Functional Urban Area
Gini_real_estate_wealth_per_capita_FUA <- aggregate(Real_estate_wealth_per_capita ~ FUA , data = data, FUN = ineq::Gini)

# rename the columns to make them make more sense
Gini_real_estate_wealth_per_capita_FUA <- Gini_real_estate_wealth_per_capita_FUA %>%
  rename(Gini_real_estate_wealth_per_capita = Real_estate_wealth_per_capita)

## Calculate Gini of movable wealth per capita

# calculate Gini coefficients for every Functional Urban Area
Gini_movable_wealth_per_capita_FUA <- aggregate(Movable_wealth_per_capita ~ FUA , data = data, FUN = ineq::Gini)

# rename the columns to make them make more sense
Gini_movable_wealth_per_capita_FUA <- Gini_movable_wealth_per_capita_FUA %>%
  rename(Gini_movable_wealth_per_capita = Movable_wealth_per_capita)

#### Calculate indicators with per adult capita calculations ####

# Load the database about household characteristics
households <- read_sav("G:/Bevolking/GBAHUISHOUDENSBUS/GBAHUISHOUDENS2022BUSV1.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
households <- mutate(households, ID = paste(households$RINPERSOONS, households$RINPERSOON))

# make the households database more readable
households <- rename(households, start_date = DATUMAANVANGHH)
households <- rename(households, end_date = DATUMEINDEHH)

# Delete households that ceased to exist before the 1st January of 2011
households <- households %>%
  mutate(start_date = as.Date(as.character(start_date), format = "%Y%m%d"),
         end_date = as.Date(as.character(end_date), format = "%Y%m%d")) %>%
  filter(end_date > as.Date("2011-01-01"))

# Delete superflous information in households database due to its huge size and our limited computational power
households <- households %>%
  select(ID, start_date, end_date, AANTALPERSHH, AANTALKINDHH)

# extract observations from the "address" database corresponding to the year 2022, taking the 1st of January as reference
households_2022 <- households %>%
  #mutate(start_date = as.Date(as.character(start_date), format = "%Y%m%d"),
  #end_date = as.Date(as.character(end_date), format = "%Y%m%d")) %>%
  filter(start_date < as.Date("2022-01-02") & end_date > as.Date("2022-01-01"))

# Merge both databases
data <- merge(data, households_2022, by ="ID", all.x = TRUE, all.y = FALSE)

# Calculate the number of adults
data <- data %>%
  mutate(number_of_adults = AANTALPERSHH - AANTALKINDHH)

## Calculate Gini of per adult capita wealth

# Calculate the wealth per capita including only adults 
data$Wealth_per_adult_capita <- (data$VEHW1000VERH/data$number_of_adults)

# calculate Gini coefficients for every Functional Urban Area
Gini_wealth_per_adult_capita_FUA <- aggregate(Wealth_per_adult_capita ~ FUA , data = data, FUN = ineq::Gini)

# rename the columns to make them make more sense
Gini_wealth_per_adult_capita_FUA <- Gini_wealth_per_adult_capita_FUA %>%
  rename(Gini_wealth_per_adult_capita = Wealth_per_adult_capita)

## Calculate Gini of per adult capita income

# Calculate the income per capita including only adults 
data$Income_per_adult_capita <- (data$INHBESTINKH/data$number_of_adults)

# Calculate Gini coefficients for every Functional Urban Area
Gini_income_per_adult_capita_FUA <- aggregate(Income_per_adult_capita ~ FUA , data = data, FUN = ineq::Gini)

# rename the columns to make them make more sense
Gini_income_per_adult_capita_FUA <- Gini_income_per_adult_capita_FUA  %>%
  rename(Gini_income_per_adult_capita = Income_per_adult_capita)

## Calculate Gini of real estate wealth per adult capita

# Calculate the real estate wealth capita including only adults 
data$Real_estate_wealth_per_adult_capita <- (data$VEHW1120ONRH/data$number_of_adults)

# calculate Gini coefficients for every Functional Urban Area
Gini_real_estate_wealth_per_adult_capita_FUA <- aggregate(Real_estate_wealth_per_adult_capita ~ FUA , data = data, FUN = ineq::Gini)

# rename the columns to make them make more sense
Gini_real_estate_wealth_per_adult_capita_FUA <- Gini_real_estate_wealth_per_adult_capita_FUA %>%
  rename(Gini_real_estate_wealth_per_adult_capita = Real_estate_wealth_per_adult_capita)

## Calculate Gini of movable wealth per adult capita

# Calculate movable wealth
data$Movable_wealth <- data$VEHW1110FINH + data$VEHW1130ONDH + data$VEHW1140ABEH + data$VEHW1150OVEH

# Calculate movable wealth per adult capita
data$Movable_wealth_per_adult_capita <- (data$Movable_wealth/data$number_of_adults)

# calculate Gini coefficients for every Functional Urban Area
Gini_movable_wealth_per_adult_capita_FUA <- aggregate(Movable_wealth_per_adult_capita ~ FUA , data = data, FUN = ineq::Gini)

# rename the columns to make them make more sense
Gini_movable_wealth_per_adult_capita_FUA <- Gini_movable_wealth_per_adult_capita_FUA %>%
  rename(Gini_movable_wealth_per_adult_capita = Movable_wealth_per_adult_capita)


#### Aggregate FUA-level indicators ####
FUA_level_data <- merge(Gini_wealth_unadjusted_FUA, Gini_income_unadjusted_FUA, by = "FUA")
FUA_level_data <- merge(FUA_level_data, Gini_wealth_per_capita_FUA, by = "FUA")
FUA_level_data <- merge(FUA_level_data, Gini_income_per_capita_FUA, by = "FUA")
FUA_level_data <- merge(FUA_level_data, Gini_income_adjusted_FUA, by = "FUA")
FUA_level_data <- merge(FUA_level_data, Gini_real_estate_wealth_per_capita_FUA, by = "FUA")
FUA_level_data <- merge(FUA_level_data, Gini_movable_wealth_per_capita_FUA, by = "FUA")
FUA_level_data <- merge(FUA_level_data, Gini_wealth_per_adult_capita_FUA, by = "FUA")
FUA_level_data <- merge(FUA_level_data, Gini_income_per_adult_capita_FUA, by = "FUA")
FUA_level_data <- merge(FUA_level_data, Gini_real_estate_wealth_per_adult_capita_FUA, by = "FUA")
FUA_level_data <- merge(FUA_level_data, Gini_movable_wealth_per_adult_capita_FUA, by = "FUA")

#### Save ####

# Save as a csv file
write.csv(FUA_level_data, file = "inequality_data_FUA_level.csv", row.names = FALSE)

# Save as a xlsx file
write_xlsx(FUA_level_data, path = "inequality_data_FUA_level.csv")

#### Analyze correlations ####
cor(FUA_level_data$Gini_wealth_per_capita, FUA_level_data$Gini_wealth_per_adult_capita)
cor(FUA_level_data$Gini_income_per_capita, FUA_level_data$Gini_income_per_adult_capita)
cor(FUA_level_data$Gini_real_estate_wealth_per_capita, FUA_level_data$Gini_real_estate_wealth_per_adult_capita)
cor(FUA_level_data$Gini_movable_wealth_per_capita, FUA_level_data$Gini_movable_wealth_per_adult_capita)

