##### Calculating wealth per age #####
# 01_load_and_process_data script
# 05/08/2025
#####

# load the required packages
library(haven)
library(ineq)
library(DescTools)
library(tidyverse)
library(OasisR)
library(seg)
library(sf)
library(sp)

# Do not use scientific notation
options(scipen = 9999999)

#### Preparing the data #### ---------------------------------------------------

# Load the database about household characteristics
households <- read_sav("G:/Bevolking/GBAHUISHOUDENSBUS/GBAHUISHOUDENS2022BUSV1.sav")

# make the households database more readable
households <- rename(households, start_date = DATUMAANVANGHH)
households <- rename(households, end_date = DATUMEINDEHH)

# extract observations from the "address" database corresponding to the year 2022, taking the 1st of January as reference
households <- households %>%
  mutate(start_date = as.Date(as.character(start_date), format = "%Y%m%d"),
         end_date = as.Date(as.character(end_date), format = "%Y%m%d")) %>%
  filter(start_date < as.Date("2022-01-02") & end_date > as.Date("2022-01-01"))

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
households <- mutate(households, ID = paste(households$RINPERSOONS, households$RINPERSOON))

# Delete superflous information in households database due to its huge size and our limited computational power
households <- households %>%
  select(ID, start_date, end_date, AANTALPERSHH, AANTALKINDHH)

# Load the data
wealth2022 <- read.csv("raw_data/database_2022.csv") # only if not already loaded

# Merge both databases
wealth2022 <- merge(wealth2022, households, by ="ID", all.x = TRUE, all.y = FALSE)

# Calculate the number of adults
wealth2022 <- wealth2022 %>%
  mutate(number_of_adults = AANTALPERSHH - AANTALKINDHH)

# Load data about personal information
individuals_data_2022 <- read_sav("G:/Bevolking/GBAPERSOONTAB/2022/GBAPERSOON2022TABV2.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
individuals_data_2022 <- mutate(individuals_data_2022, ID = paste(individuals_data_2022$RINPERSOONS, individuals_data_2022$RINPERSOON))

# Merge
wealth2022 <- merge(wealth2022, individuals_data_2022, by = "ID", all.x = TRUE, all.y = FALSE)

# Delete missing values of wealth
wealth2022 <- wealth2022[wealth2022$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita 
wealth2022$Wealth_per_capita <- (wealth2022$VEHW1000VERH/wealth2022$INHAHL)

# Calculate wealth per capita including only adults 
wealth2022$Wealth_per_adult_capita <- (wealth2022$VEHW1000VERH/wealth2022$number_of_adults)

#### Calculate wealth per year of birth ####

# Group average and median wealth per year of birth
wealth_per_year_of_birth <- wealth2022 %>%
  group_by(GBAGEBOORTEJAAR) %>%
  summarise(number_of_observations = n(),
            mean_wealth = mean(VEHW1000VERH),
            median_wealth = median(VEHW1000VERH),
            sd_wealth = sd(VEHW1000VERH),
            variance_wealth = var(VEHW1000VERH),
            mean_per_capita_wealth = mean(Wealth_per_capita),
            median_per_capita_wealth = median(Wealth_per_capita),
            sd_per_capita_wealth = sd(Wealth_per_capita),
            variance_per_capita_wealth = var(Wealth_per_capita),
            mean_per_adult_capita_wealth = mean(Wealth_per_adult_capita, na.rm = TRUE),
            median_per_adult_capita_wealth = median(Wealth_per_adult_capita, na.rm = TRUE),
            sd_per_adult_capita_wealth = sd(Wealth_per_adult_capita, na.rm = TRUE),
            variance_per_adult_capita_wealth = var(Wealth_per_adult_capita, na.rm = TRUE))

# Exclude years with less than 20 observations
wealth_per_year_of_birth <- wealth_per_year_of_birth %>%
  filter(number_of_observations > 19)

# Calculate year of birth
wealth_per_year_of_birth <- wealth_per_year_of_birth %>%
  mutate(Age = 2022 - as.numeric(GBAGEBOORTEJAAR))

#### Calculate wealth per year of birth of those born abroad ####

# Add a variable of whether the person was born abroad or not
wealth2022 <- wealth2022 %>%
  mutate(Born_abroad = if_else(GBAGEBOORTELAND == "6030", 0, 1))

# Filter
born_abroad <- wealth2022 %>%
  filter(Born_abroad == 1)

# Group by year
wealth_per_year_of_birth_born_abroad <- born_abroad  %>%
  group_by(GBAGEBOORTEJAAR) %>%
  summarise(number_of_observations = n(),
            mean_wealth = mean(VEHW1000VERH),
            median_wealth = median(VEHW1000VERH),
            sd_wealth = sd(VEHW1000VERH),
            variance_wealth = var(VEHW1000VERH),
            mean_per_capita_wealth = mean(Wealth_per_capita),
            median_per_capita_wealth = median(Wealth_per_capita),
            sd_per_capita_wealth = sd(Wealth_per_capita),
            variance_per_capita_wealth = var(Wealth_per_capita),
            mean_per_adult_capita_wealth = mean(Wealth_per_adult_capita, na.rm = TRUE),
            median_per_adult_capita_wealth = median(Wealth_per_adult_capita, na.rm = TRUE),
            sd_per_adult_capita_wealth = sd(Wealth_per_adult_capita, na.rm = TRUE),
            variance_per_adult_capita_wealth = var(Wealth_per_adult_capita, na.rm = TRUE))

# Exclude years with less than 20 observations
wealth_per_year_of_birth_born_abroad <- wealth_per_year_of_birth_born_abroad %>%
  filter(number_of_observations > 19)

# Calculate year of birth
wealth_per_year_of_birth_born_abroad <- wealth_per_year_of_birth_born_abroad %>%
  mutate(Age = 2022 - as.numeric(GBAGEBOORTEJAAR))

#### Calculate wealth per year of birth of those born in the Netherlands ####

# Filter
born_in_the_Netherlands <- wealth2022 %>%
  filter(Born_abroad == 0)

# Group by year
wealth_per_year_of_birth_born_in_the_Netherlands <- born_in_the_Netherlands %>%
  group_by(GBAGEBOORTEJAAR) %>%
  summarise(number_of_observations = n(),
            mean_wealth = mean(VEHW1000VERH),
            median_wealth = median(VEHW1000VERH),
            sd_wealth = sd(VEHW1000VERH),
            variance_wealth = var(VEHW1000VERH),
            mean_per_capita_wealth = mean(Wealth_per_capita),
            median_per_capita_wealth = median(Wealth_per_capita),
            sd_per_capita_wealth = sd(Wealth_per_capita),
            variance_per_capita_wealth = var(Wealth_per_capita),
            mean_per_adult_capita_wealth = mean(Wealth_per_adult_capita, na.rm = TRUE),
            median_per_adult_capita_wealth = median(Wealth_per_adult_capita, na.rm = TRUE),
            sd_per_adult_capita_wealth = sd(Wealth_per_adult_capita, na.rm = TRUE),
            variance_per_adult_capita_wealth = var(Wealth_per_adult_capita, na.rm = TRUE))

# Exclude years with less than 20 observations
wealth_per_year_of_birth_born_in_the_Netherlands <- wealth_per_year_of_birth_born_in_the_Netherlands %>%
  filter(number_of_observations > 19)

# Calculate year of birth
wealth_per_year_of_birth_born_in_the_Netherlands <- wealth_per_year_of_birth_born_in_the_Netherlands %>%
  mutate(Age = 2022 - as.numeric(GBAGEBOORTEJAAR))

#### Save ####

# Save as a csv file
write.csv(wealth_per_year_of_birth, file = "wealth_per_year_of_birth.csv", row.names = FALSE)
write.csv(wealth_per_year_of_birth_born_abroad, file = "wealth_per_year_of_birth_born_abroad.csv", row.names = FALSE)
write.csv(wealth_per_year_of_birth_born_in_the_Netherlands, file = "wealth_per_year_of_birth_born_in_the_Netherlands.csv", row.names = FALSE)

# Save as a xlsx file
write_xlsx(wealth_per_year_of_birth, path = "wealth_per_year_of_birth.xlsx")
write_xlsx(wealth_per_year_of_birth_born_abroad, path = "wealth_per_year_of_birth_born_abroad.xlsx")
write_xlsx(wealth_per_year_of_birth_born_in_the_Netherlands, path = "wealth_per_year_of_birth_born_in_the_Netherlands.xlsx")

#### Visualize as a check ####

# Visualize wealth 
ggplot() +
  geom_point(data = wealth_per_year_of_birth, aes(x = Age, y = mean_wealth), color = "yellow") +
  geom_point(data = wealth_per_year_of_birth_born_abroad, aes(x = Age, y = mean_wealth), color = "red") +
  geom_point(data = wealth_per_year_of_birth_born_in_the_Netherlands, aes(x = Age, y = mean_wealth), color = "blue")
  
# Visualize wealth per capita
ggplot() +
  geom_point(data = wealth_per_year_of_birth, aes(x = Age, y = mean_per_capita_wealth), color = "yellow") +
  geom_point(data = wealth_per_year_of_birth_born_abroad, aes(x = Age, y = mean_per_capita_wealth), color = "red") +
  geom_point(data = wealth_per_year_of_birth_born_in_the_Netherlands, aes(x = Age, y = mean_per_capita_wealth), color = "blue")

# Visualize wealth per adult capita
ggplot() +
  geom_point(data = wealth_per_year_of_birth, aes(x = Age, y = mean_per_adult_capita_wealth), color = "yellow") +
  geom_point(data = wealth_per_year_of_birth_born_abroad, aes(x = Age, y = mean_per_adult_capita_wealth), color = "red") +
  geom_point(data = wealth_per_year_of_birth_born_in_the_Netherlands, aes(x = Age, y = mean_per_adult_capita_wealth), color = "blue")

# Now with median
ggplot() +
  geom_point(data = wealth_per_year_of_birth, aes(x = Age, y = median_wealth), color = "yellow") +
  geom_point(data = wealth_per_year_of_birth_born_abroad, aes(x = Age, y = median_wealth), color = "red") +
  geom_point(data = wealth_per_year_of_birth_born_in_the_Netherlands, aes(x = Age, y = median_wealth), color = "blue")

# Visualize wealth per capita
ggplot() +
  geom_point(data = wealth_per_year_of_birth, aes(x = Age, y = median_per_capita_wealth), color = "yellow") +
  geom_point(data = wealth_per_year_of_birth_born_abroad, aes(x = Age, y = median_per_capita_wealth), color = "red") +
  geom_point(data = wealth_per_year_of_birth_born_in_the_Netherlands, aes(x = Age, y = median_per_capita_wealth), color = "blue")

# Visualize wealth per adult capita
ggplot() +
  geom_point(data = wealth_per_year_of_birth, aes(x = Age, y = median_per_adult_capita_wealth), color = "yellow") +
  geom_point(data = wealth_per_year_of_birth_born_abroad, aes(x = Age, y = median_per_adult_capita_wealth), color = "red") +
  geom_point(data = wealth_per_year_of_birth_born_in_the_Netherlands, aes(x = Age, y = median_per_adult_capita_wealth), color = "blue")
  
#### Study how the distribution of wealth varies through age ####

# Calculate wealth per capita in each household
wealth2022$Wealth_per_capita <- wealth2022$VEHW1000VERH
wealth2022$Wealth_per_capita <- (wealth2022$Wealth_per_capita/wealth2022$INHAHL)

# Calculate real estate wealth per capita
wealth2022$Real_estate_wealth_per_capita <- wealth2022$VEHW1120ONRH
wealth2022$Real_estate_wealth_per_capita <- (wealth2022$Real_estate_wealth_per_capita/wealth2022$INHAHL)
wealth2022$Real_estate_wealth_per_capita <- pmax(wealth2022$Real_estate_wealth_per_capita, 0)

# Calculate movable wealth
wealth2022$Movable_wealth <- wealth2022$VEHW1110FINH + wealth2022$VEHW1130ONDH + wealth2022$VEHW1140ABEH + wealth2022$VEHW1150OVEH
wealth2022$Movable_wealth_per_capita <- (wealth2022$Movable_wealth/wealth2022$INHAHL)
wealth2022$Movable_wealth_per_capita <- pmax(wealth2022$Movable_wealth_per_capita, 0)

# Calculate deposits and savings wealth
wealth2022$Deposits_and_savings_per_capita <- (wealth2022$VEHW1111BANH/wealth2022$INHAHL)
wealth2022$Deposits_and_savings_per_capita <- pmax(wealth2022$Deposits_and_savings_per_capita, 0)

# Calculate entrepenurial wealth
wealth2022$Entrepenurial_wealth_per_capita <- (wealth2022$VEHW1130ONDH/wealth2022$INHAHL)
wealth2022$Entrepenurial_wealth_per_capita <- pmax(wealth2022$Entrepenurial_wealth_per_capita, 0)

# Calculate bonds and shares wealth
wealth2022$Bonds_and_shares_wealth <- wealth2022$VEHW1112EFFH + wealth2022$VEHW1140ABEH
wealth2022$Bonds_and_shares_wealth_per_capita <- (wealth2022$Bonds_and_shares_wealth/wealth2022$INHAHL)
wealth2022$Bonds_and_shares_wealth_per_capita <- pmax(wealth2022$Bonds_and_shares_wealth_per_capita, 0)

# Calculate other wealth (e.g. cash)
wealth2022$Other_wealth_per_capita <- (wealth2022$VEHW1150OVEH/wealth2022$INHAHL)
wealth2022$Other_wealth_per_capita <- pmax(wealth2022$Other_wealth_per_capita, 0)

# Calculate significant shares wealth
wealth2022$Significant_shares_wealth_per_capita <- (wealth2022$VEHW1140ABEH/wealth2022$INHAHL)
wealth2022$Significant_shares_wealth_per_capita <- pmax(wealth2022$Significant_shares_wealth_per_capita, 0)

# Calculate bonds wealth
wealth2022$Bonds_wealth_per_capita <- (wealth2022$VEHW1112EFFH/wealth2022$INHAHL)
wealth2022$Bonds_wealth_per_capita <- pmax(wealth2022$Bonds_wealth_per_capita, 0)

# Calculate only positive wealth
wealth2022$All_wealth_per_capita <- wealth2022$Other_wealth_per_capita + wealth2022$Bonds_and_shares_wealth_per_capita + wealth2022$Entrepenurial_wealth_per_capita + wealth2022$Deposits_and_savings_per_capita + wealth2022$Real_estate_wealth_per_capita

# Calculate share of real estate compared to the overall wealth
wealth2022$Share_real_estate <- wealth2022$Real_estate_wealth_per_capita / wealth2022$All_wealth_per_capita

# Calculate share of financial wealth compared to the overall wealth
wealth2022$Share_financial_wealth <- wealth2022$Movable_wealth_per_capita / wealth2022$All_wealth_per_capita

# Calculate subtypes of financial wealth shares
wealth2022$Share_deposits_and_savings_wealth <- wealth2022$Deposits_and_savings_per_capita / wealth2022$All_wealth_per_capita
wealth2022$Share_entrepenurial_wealth <- wealth2022$Entrepenurial_wealth_per_capita / wealth2022$All_wealth_per_capita
wealth2022$Share_bonds_and_shares_wealth <- wealth2022$Bonds_and_shares_wealth_per_capita / wealth2022$All_wealth_per_capita
wealth2022$Share_other_wealth <- wealth2022$Other_wealth_per_capita / wealth2022$All_wealth_per_capita
wealth2022$Share_only_significant_shares <- wealth2022$Significant_shares_wealth_per_capita / wealth2022$All_wealth_per_capita
wealth2022$Share_only_bonds <- wealth2022$Bonds_wealth_per_capita / wealth2022$All_wealth_per_capita

# Calculate the % for every kind of type of wealth at every percentile
share_type_wealth_age <- wealth2022 %>%
  group_by(GBAGEBOORTEJAAR) %>%
  summarize(number_of_observations = n(),
            Real_estate_wealth_share = mean(Share_real_estate, na.rm = TRUE),
            Financial_wealth_share = mean(Share_financial_wealth, na.rm = TRUE),
            Deposits_and_savings_wealth_share = mean(Share_deposits_and_savings_wealth, na.rm = TRUE),
            Entrepenurial_wealth_share = mean(Share_entrepenurial_wealth, na.rm = TRUE),
            Share_and_bonds_wealth_share = mean(Share_bonds_and_shares_wealth, na.rm = TRUE),
            Other_wealth_share = mean(Share_other_wealth, na.rm = TRUE),
            Only_significant_shares_share = mean(Share_only_significant_shares, na.rm = TRUE),
            Only_bonds_and_non_significant_shares_share = mean(Share_only_bonds, na.rm = TRUE))

# Calculate year of birth
share_type_wealth_age <- share_type_wealth_age %>%
  mutate(Age = 2022 - as.numeric(GBAGEBOORTEJAAR))

# Exclude years with less than 20 observations
share_type_wealth_age <- share_type_wealth_age %>%
  filter(number_of_observations > 19)

# Save as a csv file
write.csv(share_type_wealth_age, file = "share_type_wealth_age.csv", row.names = FALSE)

# Save as a xlsx file
write_xlsx(share_type_wealth_age, path = "share_type_wealth_age.xlsx")

# Visualize
ggplot(share_type_wealth_age) +
  geom_point(aes(x = Age, y = Real_estate_wealth_share), color = "yellow") +
  geom_line(aes(x = Age, y = Real_estate_wealth_share), color = "yellow") +
  geom_point(aes(x = Age, y = Deposits_and_savings_wealth_share), color = "lightblue") +
  geom_line(aes(x = Age, y = Deposits_and_savings_wealth_share), color = "lightblue") +
  geom_point(aes(x = Age, y = Entrepenurial_wealth_share), color = "blue") +
  geom_line(aes(x = Age, y = Entrepenurial_wealth_share), color = "blue") +
  geom_point(aes(x = Age, y = Share_and_bonds_wealth_share), color = "darkblue") +
  geom_line(aes(x = Age, y = Share_and_bonds_wealth_share), color = "darkblue") +
  geom_point(aes(x = Age, y = Other_wealth_share), color = "grey") +
  geom_line(aes(x = Age, y = Other_wealth_share), color = "grey") +
  geom_point(aes(x = Age, y = Only_significant_shares_share), color = "orange") +
  geom_line(aes(x = Age, y = Only_significant_shares_share), color = "orange")
