##### Calculating inequality indicators #####
# 01_load_and_process_data script
# 28/10/2024
#####

# load the required packages
library(haven)
library(ineq)
library(DescTools)
library(tidyverse)
library(OasisR)
library(seg)
library(sf)
library(sp)

# Do not use scientific notation
options(scipen = 999)

#### Load and prepare data for 2022 ####

# Load the data
data <- read.csv("raw_data/database_2022.csv") # only if not already loaded

# Delete missing values
data <- data[data$VEHW1000VERH != 99999999999, ] #### deleting the missing values of this variable deletes all missing values of all variables

#### Calculate different forms of inequality ####

# Calculate unadjusted wealth inequality - B
data$VEHW1000VERH <- pmax(data$VEHW1000VERH, 0)
Gini_wealth_unadjusted <- ineq::Gini(data$VEHW1000VERH)

# Calculate unadjusted income inequality - C
data$INHBESTINKH <- pmax(data$INHBESTINKH, 0)
Gini_income_unadjusted <- ineq::Gini(data$INHBESTINKH)

# Calculate per capita wealth inequality - D
data$Wealth_per_capita <- (data$VEHW1000VERH/data$INHAHL)
Gini_wealth_per_capita <- ineq::Gini(data$Wealth_per_capita)

# Calculate per capita income inequality - E
data$Income_per_capita <- (data$INHBESTINKH/data$INHAHL)
Gini_income_per_capita <- ineq::Gini(data$Income_per_capita)

# Calculate income inequality adjusted for household size and composition - F
data$INHGESTINKH <- pmax(data$INHGESTINKH, 0)
Gini_income_adjusted <- ineq::Gini(data$INHGESTINKH)

# Calculate real estate inequality - G
data$VEHW1120ONRH <- pmax(data$VEHW1120ONRH, 0)
data$Real_estate_wealth_per_capita <- (data$VEHW1120ONRH/data$INHAHL)
Gini_real_estate_wealth_per_capita <- ineq::Gini(data$Real_estate_wealth_per_capita)

# Calculate movable wealth - H
data$Movable_wealth <- data$VEHW1110FINH + data$VEHW1130ONDH + data$VEHW1140ABEH + data$VEHW1150OVEH
data$Movable_wealth_per_capita <- (data$Movable_wealth/data$INHAHL)
data$Movable_wealth_per_capita <- pmax(data$Movable_wealth_per_capita, 0)
Gini_movable_wealth_per_capita <- ineq::Gini(data$Movable_wealth_per_capita)

#### Aggregate all data ####

Gini_national <- data.frame(
  Type_of_Gini = c("Wealth_unadjusted",
                   "Income_unadjusted",
                   "Wealth_per_capita",
                   "Income_per_capita",
                   "Income_adjusted",
                   "Real_estate_wealth_per_capita",
                   "Movable_wealth_per_capita"),
  Value_2022 = c(Gini_wealth_unadjusted,
            Gini_income_unadjusted,
            Gini_wealth_per_capita,
            Gini_income_per_capita,
            Gini_income_adjusted,
            Gini_real_estate_wealth_per_capita,
            Gini_movable_wealth_per_capita
            )
)


#### Calculate the distribution of resources #### -------------------------------

## Wealth unadjusted
# Order the database depending on the resource
data <- data %>%
  arrange(VEHW1000VERH)

# assign to each household the percentile it belongs to
data <- data %>%
  mutate(Percentile = ntile(VEHW1000VERH, 100))

# calculate the total amount of resources, and its share, that all households of every percentile records
distribution_wealth_unadjusted <- data %>%
  group_by(Percentile) %>%
  summarize(Sum_wealth_unadjusted = sum(VEHW1000VERH))
distribution_wealth_unadjusted <- distribution_wealth_unadjusted  %>%
  mutate(Share_wealth_unadjusted = Sum_wealth_unadjusted / sum(Sum_wealth_unadjusted))

## Income unadjusted
# Order the database depending on the resource
data <- data %>%
  arrange(INHBESTINKH)

# assign to each household the percentile it belongs to
data <- data %>%
  mutate(Percentile = ntile(INHBESTINKH, 100))

# calculate the total amount of resources, and its share, that all households of every percentile records
distribution_income_unadjusted <- data %>%
  group_by(Percentile) %>%
  summarize(Sum_income_unadjusted = sum(INHBESTINKH))
distribution_income_unadjusted <- distribution_income_unadjusted  %>%
  mutate(Share_income_unadjusted = Sum_income_unadjusted / sum(Sum_income_unadjusted))

## Wealth per capita
# Order the database depending on the resource
data <- data %>%
  arrange(Wealth_per_capita)

# assign to each household the percentile it belongs to
data <- data %>%
  mutate(Percentile = ntile(Wealth_per_capita, 100))

# calculate the total amount of resources, and its share, that all households of every percentile records
distribution_wealth_per_capita <- data %>%
  group_by(Percentile) %>%
  summarize(Sum_wealth_per_capita = sum(Wealth_per_capita))
distribution_wealth_per_capita <- distribution_wealth_per_capita  %>%
  mutate(Share_wealth_per_capita = Sum_wealth_per_capita / sum(Sum_wealth_per_capita))

## Income per capita
# Order the database depending on the resource
data <- data %>%
  arrange(Income_per_capita)

# assign to each household the percentile it belongs to
data <- data %>%
  mutate(Percentile = ntile(Income_per_capita, 100))

# calculate the total amount of resources, and its share, that all households of every percentile records
distribution_income_per_capita <- data %>%
  group_by(Percentile) %>%
  summarize(Sum_income_per_capita = sum(Income_per_capita))
distribution_income_per_capita <- distribution_income_per_capita  %>%
  mutate(Share_income_per_capita = Sum_income_per_capita / sum(Sum_income_per_capita))

## Income adjusted for household size and composition
# Order the database depending on the resource
data <- data %>%
  arrange(INHGESTINKH)

# assign to each household the percentile it belongs to
data <- data %>%
  mutate(Percentile = ntile(INHGESTINKH, 100))

# calculate the total amount of resources, and its share, that all households of every percentile records
distribution_income_adjusted <- data %>%
  group_by(Percentile) %>%
  summarize(Sum_income_adjusted = sum(INHBESTINKH))
distribution_income_adjusted <- distribution_income_adjusted  %>%
  mutate(Share_income_adjusted = Sum_income_adjusted / sum(Sum_income_adjusted))

## Real estate wealth
# Order the database depending on the resource
data <- data %>%
  arrange(Real_estate_wealth_per_capita)

# assign to each household the percentile it belongs to
data <- data %>%
  mutate(Percentile = ntile(Real_estate_wealth_per_capita, 100))

# calculate the total amount of resources, and its share, that all households of every percentile records
distribution_real_estate_per_capita <- data %>%
  group_by(Percentile) %>%
  summarize(Sum_real_estate_per_capita = sum(Real_estate_wealth_per_capita))
distribution_real_estate_per_capita <- distribution_real_estate_per_capita  %>%
  mutate(Share_real_estate_per_capita = Sum_real_estate_per_capita / sum(Sum_real_estate_per_capita))

## Movable wealth
# Order the database depending on the resource
data <- data %>%
  arrange(Movable_wealth_per_capita)

# assign to each household the percentile it belongs to
data <- data %>%
  mutate(Percentile = ntile(Movable_wealth_per_capita, 100))

# calculate the total amount of resources, and its share, that all households of every percentile records
distribution_movable_wealth_per_capita <- data %>%
  group_by(Percentile) %>%
  summarize(Sum_movable_wealth_per_capita = sum(Movable_wealth_per_capita))
distribution_movable_wealth_per_capita <- distribution_movable_wealth_per_capita  %>%
  mutate(Share_movable_wealth_per_capita = Sum_movable_wealth_per_capita / sum(Sum_movable_wealth_per_capita))

#### Aggregate all data ####
distribution <- merge(distribution_wealth_unadjusted, distribution_income_unadjusted, by = "Percentile")
distribution <- merge(distribution, distribution_wealth_per_capita, by = "Percentile")
distribution <- merge(distribution, distribution_income_per_capita, by = "Percentile")
distribution <- merge(distribution, distribution_income_adjusted, by = "Percentile")
distribution <- merge(distribution, distribution_real_estate_per_capita, by = "Percentile")
distribution <- merge(distribution, distribution_movable_wealth_per_capita, by = "Percentile")




