##### Calculating percentile thresholds #####
# 01_load_and_process_data script
# 30/10/2024
#####

# load the required packages
library(haven)
library(ineq)
library(DescTools)
library(tidyverse)
library(OasisR)
library(seg)
library(sf)
library(sp)

# Do not use scientific notation
options(scipen = 9999999)

#### Preparing the data #### ---------------------------------------------------
# Load the data
wealth2022 <- read.csv("raw_data/database_2022.csv") # only if not already loaded

# Delete missing values of wealth
wealth2022 <- wealth2022[wealth2022$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2022$Wealth_per_capita <- (wealth2022$VEHW1000VERH/wealth2022$INHAHL)

# Delete missing values of income
wealth2022 <- wealth2022[wealth2022$INHBESTINKH != 9999999999, ]

# Calculate income per capita in each household
wealth2022$Income_per_capita <- (wealth2022$INHBESTINKH/wealth2022$INHAHL)

#### Wealth #### ---------------------------------------------------------------
# Order values according to their wealth per capita
wealth2022 <- wealth2022 %>%
  arrange(Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs
wealth2022 <- wealth2022 %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Calculate percentile threshold
percentile_min <- wealth2022 %>%
  group_by(Wealth_per_capita_percentile) %>%
  summarize(Threshold_wealth = min(Wealth_per_capita, na.rm = TRUE))

#### Income #### ---------------------------------------------------------------
# Order values according to their income per capita
wealth2022 <- wealth2022 %>%
  arrange(Income_per_capita)

# Calculate th income percentile rank every household belongs to
wealth2022 <- wealth2022 %>% 
  mutate(Income_per_capita_percentile = ntile(Income_per_capita, 100))

# Calculate percentile threshold
percentile_min_income <- wealth2022 %>%
  group_by(Income_per_capita_percentile) %>%
  summarize(Threshold_income = min(Income_per_capita, na.rm = TRUE))

#### Repeat the process at the FUA level #### ----------------------------------

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2022 <- wealth2022 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2022 <- wealth2022 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Calculate percentile threshold
percentile_min_FUA <- wealth2022 %>%
  group_by(FUA, Wealth_per_capita_percentile) %>%
  summarize(Threshold_wealth = min(Wealth_per_capita, na.rm = TRUE), .groups = "drop")

#### Income #### ----------------------------------------------------------------

# Order values according to their wealth within every FUA
wealth2022 <- wealth2022 %>%
  arrange(FUA, Income_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2022 <- wealth2022 %>%
  group_by(FUA) %>% 
  mutate(Income_per_capita_percentile = ntile(Income_per_capita, 100))

# Calculate percentile threshold
percentile_min_FUA_income <- wealth2022 %>%
  group_by(FUA, Income_per_capita_percentile) %>%
  summarize(Threshold_income = min(Income_per_capita, na.rm = TRUE), .groups = "drop")

#### Merge #### ----------------------------------------------------------------

# Merge national level data
percentile_min <- merge(percentile_min, percentile_min_income, by.x = "Wealth_per_capita_percentile", by.y = "Income_per_capita_percentile")

# Rename column
percentile_min <- percentile_min %>% 
  rename(Percentile = Wealth_per_capita_percentile)

# Merge FUA level data
percentile_min_FUA <- merge(percentile_min_FUA, percentile_min_FUA_income, by.x = c("FUA", "Wealth_per_capita_percentile"), by.y = c("FUA", "Income_per_capita_percentile"))

# Rename column
percentile_min_FUA <- percentile_min_FUA %>% 
  rename(Percentile = Wealth_per_capita_percentile)

# Prepare national level data for merging with FUA level data
percentile_min$FUA <- "National_level"

# Merge national level data and FUA level data
percentile_thresholds <- bind_rows(percentile_min, percentile_min_FUA)

#### Calculate the kind of wealth that every percentile has #### -----------------

# Reload the data
data <- read.csv("raw_data/database_2022.csv")

# Delete missing values of wealth
data <- data[data$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
data$Wealth_per_capita <- data$VEHW1000VERH
data$Wealth_per_capita <- (data$Wealth_per_capita/data$INHAHL)

# Calculate real estate wealth per capita
data$Real_estate_wealth_per_capita <- data$VEHW1120ONRH
data$Real_estate_wealth_per_capita <- (data$Real_estate_wealth_per_capita/data$INHAHL)
data$Real_estate_wealth_per_capita <- pmax(data$Real_estate_wealth_per_capita, 0)

# Calculate movable wealth
data$Movable_wealth <- data$VEHW1110FINH + data$VEHW1130ONDH + data$VEHW1140ABEH + data$VEHW1150OVEH
data$Movable_wealth_per_capita <- (data$Movable_wealth/data$INHAHL)
data$Movable_wealth_per_capita <- pmax(data$Movable_wealth_per_capita, 0)

# Calculate deposits and savings wealth
data$Deposits_and_savings_per_capita <- (data$VEHW1111BANH/data$INHAHL)
data$Deposits_and_savings_per_capita <- pmax(data$Deposits_and_savings_per_capita, 0)

# Calculate entrepenurial wealth
data$Entrepenurial_wealth_per_capita <- (data$VEHW1130ONDH/data$INHAHL)
data$Entrepenurial_wealth_per_capita <- pmax(data$Entrepenurial_wealth_per_capita, 0)

# Calculate bonds and shares wealth
data$Bonds_and_shares_wealth <- data$VEHW1112EFFH + data$VEHW1140ABEH
data$Bonds_and_shares_wealth_per_capita <- (data$Bonds_and_shares_wealth/data$INHAHL)
data$Bonds_and_shares_wealth_per_capita <- pmax(data$Bonds_and_shares_wealth_per_capita, 0)

# Calculate other wealth (e.g. cash)
data$Other_wealth_per_capita <- (data$VEHW1150OVEH/data$INHAHL)
data$Other_wealth_per_capita <- pmax(data$Other_wealth_per_capita, 0)

# Calculate significant shares wealth
data$Significant_shares_wealth_per_capita <- (data$VEHW1140ABEH/data$INHAHL)
data$Significant_shares_wealth_per_capita <- pmax(data$Significant_shares_wealth_per_capita, 0)

# Calculate bonds wealth
data$Bonds_wealth_per_capita <- (data$VEHW1112EFFH/data$INHAHL)
data$Bonds_wealth_per_capita <- pmax(data$Bonds_wealth_per_capita, 0)

# Calculate only positive wealth
data$All_wealth_per_capita <- data$Other_wealth_per_capita + data$Bonds_and_shares_wealth_per_capita + data$Entrepenurial_wealth_per_capita + data$Deposits_and_savings_per_capita + data$Real_estate_wealth_per_capita

# Calculate share of real estate compared to the overall wealth
data$Share_real_estate <- data$Real_estate_wealth_per_capita / data$All_wealth_per_capita

# Calculate share of financial wealth compared to the overall wealth
data$Share_financial_wealth <- data$Movable_wealth_per_capita / data$All_wealth_per_capita

# Calculate subtypes of financial wealth shares
data$Share_deposits_and_savings_wealth <- data$Deposits_and_savings_per_capita / data$All_wealth_per_capita
data$Share_entrepenurial_wealth <- data$Entrepenurial_wealth_per_capita / data$All_wealth_per_capita
data$Share_bonds_and_shares_wealth <- data$Bonds_and_shares_wealth_per_capita / data$All_wealth_per_capita
data$Share_other_wealth <- data$Other_wealth_per_capita / data$All_wealth_per_capita
data$Share_only_significant_shares <- data$Significant_shares_wealth_per_capita / data$All_wealth_per_capita
data$Share_only_bonds <- data$Bonds_wealth_per_capita / data$All_wealth_per_capita

# Order values according to their wealth per capita
data <- data %>%
  arrange(Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs
data <- data %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Calculate the % for every kind of type of wealth at every percentile
share_type_wealth_percentile <- data %>%
  group_by(Wealth_per_capita_percentile) %>%
  summarize(Real_estate_wealth_share = mean(Share_real_estate, na.rm = TRUE),
            Financial_wealth_share = mean(Share_financial_wealth, na.rm = TRUE),
            Deposits_and_savings_wealth_share = mean(Share_deposits_and_savings_wealth, na.rm = TRUE),
            Entrepenurial_wealth_share = mean(Share_entrepenurial_wealth, na.rm = TRUE),
            Share_and_bonds_wealth_share = mean(Share_bonds_and_shares_wealth, na.rm = TRUE),
            Other_wealth_share = mean(Share_other_wealth, na.rm = TRUE),
            Only_significant_shares_share = mean(Share_only_significant_shares, na.rm = TRUE),
            Only_bonds_and_non_significant_shares_share = mean(Share_only_bonds, na.rm = TRUE))
