#### Preparation #### -----------------------------------------------------------

# Load libraries
library(tidyverse)
library(openxlsx)
library(tidyverse)
library(patchwork)
library(sf)
library(readxl)

# Avoid scientific notation
options(scipen = 999999)

# Load data
#data <- read_csv("processed_data/segregation_data_FUA_level.csv")
data <- read_excel("processed_data/segregation_data_FUA_level.xlsx")
data_Gini <- read.csv("processed_data/Distribution_data.csv")

# Function to calculate entropy at each income percentile
entropy_function <- function(p) {
  if (p == 0 || p == 1) {
    return(0)
  } else {
    return(p * log2(1/p) + (1 - p) * log2(1/(1 - p)))
  }
}

# Create a dataframe with entropy values for percentiles 1 to 99 without decimals
percentiles <- 1:99
entropy_values <- sapply(percentiles / 100, entropy_function)
df_entropy <- data.frame(Group = percentiles, Entropy = entropy_values)


#### Wealth segregation at 500 m #### ------------------------------------------
# Select variable
data_wealth_500m <- data %>%
  filter(Variable == "Wealth_per_capita_500m")

# Initialize an empty dataframe to store results
wealth_all_cities_years_SROITI <- data.frame()

# Calculate SROITI for each city and year
for (year in unique(data_wealth_500m$Year)) {
  for (city in unique(data_wealth_500m$FUA)) {
    
    # Filter data for the specific year and city
    SITI_year_city <- data_wealth_500m %>%
      filter(Year == year, FUA == city) %>%
      select(Group, Value)
    
    # Merge entropy values with city data
    merged_data <- merge(df_entropy, SITI_year_city, by = "Group")
    
    # Calculate the approximate integral for SROITI
    dp <- 1/99
    integral_approx <- sum(merged_data$Entropy * merged_data$Value) * dp
    
    # Calculate SROITI using the formula
    SROITI <- 2 * log(2) * integral_approx
    
    # Append to the results dataframe
    wealth_all_cities_years_SROITI <- rbind(wealth_all_cities_years_SROITI, data.frame(City = city, Year = year, SROITI = SROITI))
  }
}



#### Wealth segregation at 4000 m #### -----------------------------------------
# Select variable
data_wealth_4000m <- data %>%
  filter(Variable == "Wealth_per_capita_4000m")

# Initialize an empty dataframe to store results
wealth_all_cities_years_SROITI_4000m <- data.frame()

# Calculate SROITI for each city and year
for (year in unique(data_wealth_4000m$Year)) {
  for (city in unique(data_wealth_4000m$FUA)) {
    
    # Filter data for the specific year and city
    SITI_year_city <- data_wealth_4000m %>%
      filter(Year == year, FUA == city) %>%
      select(Group, Value)
    
    # Merge entropy values with city data
    merged_data <- merge(df_entropy, SITI_year_city, by = "Group")
    
    # Calculate the approximate integral for SROITI
    dp <- 1/99
    integral_approx <- sum(merged_data$Entropy * merged_data$Value) * dp
    
    # Calculate SROITI using the formula
    SROITI <- 2 * log(2) * integral_approx
    
    # Append to the results dataframe
    wealth_all_cities_years_SROITI_4000m <- rbind(wealth_all_cities_years_SROITI_4000m, data.frame(City = city, Year = year, SROITI = SROITI))
  }
}






#### Income segregation at 500 m #### ----------------------------------------------
# Select variable
data_income_500m <- data %>%
  filter(Variable == "Income_per_capita_500m")

# Initialize an empty dataframe to store results
income_all_cities_years_SROITI <- data.frame()

# Calculate SROITI for each city and year
for (year in unique(data_income_500m$Year)) {
  for (city in unique(data_income_500m$FUA)) {
    
    # Filter data for the specific year and city
    SITI_year_city <- data_income_500m %>%
      filter(Year == year, FUA == city) %>%
      select(Group, Value)
    
    # Merge entropy values with city data
    merged_data <- merge(df_entropy, SITI_year_city, by = "Group")
    
    # Calculate the approximate integral for SROITI
    dp <- 1/99
    integral_approx <- sum(merged_data$Entropy * merged_data$Value) * dp
    
    # Calculate SROITI using the formula
    SROITI <- 2 * log(2) * integral_approx
    
    # Append to the results dataframe
    income_all_cities_years_SROITI <- rbind(income_all_cities_years_SROITI, data.frame(City = city, Year = year, SROITI = SROITI))
  }
}



#### Income segregation at 4000 m #### --------------------------------------------
# Select variable
data_income_4000m <- data %>%
  filter(Variable == "Income_per_capita_4000m")

# Initialize an empty dataframe to store results
income_all_cities_years_SROITI_4000m <- data.frame()

# Calculate SROITI for each city and year
for (year in unique(data_income_4000m$Year)) {
  for (city in unique(data_income_4000m$FUA)) {
    
    # Filter data for the specific year and city
    SITI_year_city <- data_income_4000m %>%
      filter(Year == year, FUA == city) %>%
      select(Group, Value)
    
    # Merge entropy values with city data
    merged_data <- merge(df_entropy, SITI_year_city, by = "Group")
    
    # Calculate the approximate integral for SROITI
    dp <- 1/99
    integral_approx <- sum(merged_data$Entropy * merged_data$Value) * dp
    
    # Calculate SROITI using the formula
    SROITI <- 2 * log(2) * integral_approx
    
    # Append to the results dataframe
    income_all_cities_years_SROITI_4000m <- rbind(income_all_cities_years_SROITI_4000m, data.frame(City = city, Year = year, SROITI = SROITI))
  }
}


#### Real estate segregation at 500 m #### ----------------------------------------------
# Select variable
data_real_estate_500m <- data %>%
  filter(Variable == "Real_estate_500m")

# Initialize an empty dataframe to store results
real_estate_all_cities_years_SROITI <- data.frame()

# Calculate SROITI for each city and year
for (year in unique(data_real_estate_500m$Year)) {
  for (city in unique(data_real_estate_500m$FUA)) {
    
    # Filter data for the specific year and city
    SITI_year_city <- data_real_estate_500m %>%
      filter(Year == year, FUA == city) %>%
      select(Group, Value)
    
    # Merge entropy values with city data
    merged_data <- merge(df_entropy, SITI_year_city, by = "Group")
    
    # Calculate the approximate integral for SROITI
    dp <- 1/99
    integral_approx <- sum(merged_data$Entropy * merged_data$Value) * dp
    
    # Calculate SROITI using the formula
    SROITI <- 2 * log(2) * integral_approx
    
    # Append to the results dataframe
    real_estate_all_cities_years_SROITI <- rbind(real_estate_all_cities_years_SROITI, data.frame(City = city, Year = year, SROITI = SROITI))
  }
}


#### Real estate segregation at 4000 m #### ----------------------------------------------
# Select variable
data_real_estate_4000m <- data %>%
  filter(Variable == "Real_estate_4000m")

# Initialize an empty dataframe to store results
real_estate_all_cities_years_SROITI_4000m <- data.frame()

# Calculate SROITI for each city and year
for (year in unique(data_real_estate_4000m$Year)) {
  for (city in unique(data_real_estate_4000m$FUA)) {
    
    # Filter data for the specific year and city
    SITI_year_city <- data_real_estate_4000m %>%
      filter(Year == year, FUA == city) %>%
      select(Group, Value)
    
    # Merge entropy values with city data
    merged_data <- merge(df_entropy, SITI_year_city, by = "Group")
    
    # Calculate the approximate integral for SROITI
    dp <- 1/99
    integral_approx <- sum(merged_data$Entropy * merged_data$Value) * dp
    
    # Calculate SROITI using the formula
    SROITI <- 2 * log(2) * integral_approx
    
    # Append to the results dataframe
    real_estate_all_cities_years_SROITI_4000m <- rbind(real_estate_all_cities_years_SROITI_4000m, data.frame(City = city, Year = year, SROITI = SROITI))
  }
}


#### Movable wealth segregation at 500 m #### ----------------------------------------------
# Select variable
data_movable_wealth_500m <- data %>%
  filter(Variable == "Movable_wealth_500m")

# Initialize an empty dataframe to store results
movable_wealth_all_cities_years_SROITI <- data.frame()

# Calculate SROITI for each city and year
for (year in unique(data_movable_wealth_500m$Year)) {
  for (city in unique(data_movable_wealth_500m$FUA)) {
    
    # Filter data for the specific year and city
    SITI_year_city <- data_movable_wealth_500m %>%
      filter(Year == year, FUA == city) %>%
      select(Group, Value)
    
    # Merge entropy values with city data
    merged_data <- merge(df_entropy, SITI_year_city, by = "Group")
    
    # Calculate the approximate integral for SROITI
    dp <- 1/99
    integral_approx <- sum(merged_data$Entropy * merged_data$Value) * dp
    
    # Calculate SROITI using the formula
    SROITI <- 2 * log(2) * integral_approx
    
    # Append to the results dataframe
    movable_wealth_all_cities_years_SROITI <- rbind(movable_wealth_all_cities_years_SROITI, data.frame(City = city, Year = year, SROITI = SROITI))
  }
}



#### Movable wealth segregation at 4000 m #### ----------------------------------------------
# Select variable
data_movable_wealth_4000m <- data %>%
  filter(Variable == "Movable_wealth_4000m")

# Initialize an empty dataframe to store results
movable_wealth_all_cities_years_SROITI_4000m <- data.frame()

# Calculate SROITI for each city and year
for (year in unique(data_movable_wealth_4000m$Year)) {
  for (city in unique(data_movable_wealth_4000m$FUA)) {
    
    # Filter data for the specific year and city
    SITI_year_city <- data_movable_wealth_4000m %>%
      filter(Year == year, FUA == city) %>%
      select(Group, Value)
    
    # Merge entropy values with city data
    merged_data <- merge(df_entropy, SITI_year_city, by = "Group")
    
    # Calculate the approximate integral for SROITI
    dp <- 1/99
    integral_approx <- sum(merged_data$Entropy * merged_data$Value) * dp
    
    # Calculate SROITI using the formula
    SROITI <- 2 * log(2) * integral_approx
    
    # Append to the results dataframe
    movable_wealth_all_cities_years_SROITI_4000m <- rbind(movable_wealth_all_cities_years_SROITI_4000m, data.frame(City = city, Year = year, SROITI = SROITI))
  }
}







#### Calculate scale of segregation at the percentile level #### -------------------

# Merge databases
data_income_percentiles <- merge(data_income_500m, data_income_4000m, by = c("Group", "Year", "FUA"))
data_wealth_percentiles <- merge(data_wealth_500m, data_wealth_4000m, by = c("Group", "Year", "FUA"))

# Calculate ratio
data_income_percentiles$Ratio_income_segregation <- data_income_percentiles$Value.y / data_income_percentiles$Value.x
data_wealth_percentiles$Ratio_wealth_segregation <- data_wealth_percentiles$Value.y / data_wealth_percentiles$Value.x

# Merge 
geographic_scale_data_percentiles <- merge(data_income_percentiles, data_wealth_percentiles, by = c("FUA", "Year", "Group"))

# Filter for 2022
geographic_scale_data_percentiles <- geographic_scale_data_percentiles %>%
  filter(Year == "2022")

# Calculate net micro segregation
geographic_scale_data_percentiles <- geographic_scale_data_percentiles %>%
  mutate(Income_net_micro_segregation = Value.x.x - Value.y.x,
         Wealth_net_micro_segregation = Value.x.y - Value.y.y)

# Summarize
geo_scale_percentiles_average <- geographic_scale_data_percentiles %>%
  group_by(Group) %>%
  summarize(Ratio_income = mean(Ratio_income_segregation, na.rm = TRUE),
            Ratio_wealth = mean(Ratio_wealth_segregation, na.rm = TRUE),
            Mean_segregation_income_500m = mean(Value.x.x, na.rm = TRUE),
            Mean_segregation_income_4000m = mean(Value.y.x, na.rm = TRUE),
            Mean_segregation_wealth_500m = mean(Value.x.y, na.rm = TRUE),
            Mean_segregation_wealth_4000m = mean(Value.y.y, na.rm = TRUE)
            )
geo_scale_percentiles_average <- geo_scale_percentiles_average %>%
  mutate(Income_net_micro_segregation = Mean_segregation_income_500m - Mean_segregation_income_4000m,
         Wealth_net_micro_segregation = Mean_segregation_wealth_500m - Mean_segregation_wealth_4000m)

# Plot
ggplot(geo_scale_percentiles_average) +
  geom_point(aes(x = Group, y = Income_net_micro_segregation), colour = "#0C7BDC") +
  geom_point(aes(x = Group, y = Wealth_net_micro_segregation), colour = "#FFC20A")

# Plot
ggplot(geo_scale_percentiles_average) +
  geom_point(aes(x = Group, y = Ratio_income), colour = "#0C7BDC") +
  geom_point(aes(x = Group, y = Ratio_wealth), colour = "#FFC20A")


Ignacio <- geo_scale_percentiles_average %>%
  mutate(Log_seg_income_500m = log(Mean_segregation_income_500m),
         Log_seg_income_4000m = log(Mean_segregation_income_4000m),
         Log_seg_wealth_500m = log(Mean_segregation_wealth_500m),
         Log_seg_wealth_4000m = log(Mean_segregation_wealth_4000m))

Ignacio$Difference_500m <- Ignacio$Log_seg_income_500m - Ignacio$Log_seg_wealth_500m
Ignacio$Difference_4000m <- Ignacio$Log_seg_income_4000m - Ignacio$Log_seg_wealth_4000m

# Calculate difference
geo_scale_percentiles_average$Difference <- geo_scale_percentiles_average$Ratio_income - geo_scale_percentiles_average$Ratio_wealth

# Plot
ggplot(geo_scale_percentiles_average, aes(x = Group, y = Difference)) +
  geom_point()

#### Analysis of the geographical scale of real estate and financial wealth segregation #### ------

# Get the data
data_real_estate_wealth_percentiles <- merge(data_real_estate_500m, data_real_estate_4000m, by = c("Group", "Year", "FUA"))
data_financial_wealth_percentiles <- merge(data_movable_wealth_500m, data_movable_wealth_4000m, by = c("Group", "Year", "FUA"))

# Calculate ratios
data_real_estate_wealth_percentiles$Ratio_real_estate_segregation <- data_real_estate_wealth_percentiles$Value.y / data_real_estate_wealth_percentiles$Value.x
data_financial_wealth_percentiles$Ratio_financial_wealth_segregation <- data_financial_wealth_percentiles$Value.y / data_financial_wealth_percentiles$Value.x

# Merge 
geographic_scale_data_percentiles_forms_of_wealth <- merge(data_real_estate_wealth_percentiles, data_financial_wealth_percentiles, by = c("FUA", "Year", "Group"))

# Filter for 2022
geographic_scale_data_percentiles_forms_of_wealth <- geographic_scale_data_percentiles_forms_of_wealth %>%
  filter(Year == "2022")

# Calculate net micro segregation
geographic_scale_data_percentiles_forms_of_wealth <- geographic_scale_data_percentiles_forms_of_wealth %>%
  mutate(Real_estate_net_micro_segregation = Value.x.x - Value.y.x,
         Financial_wealth_net_micro_segregation = Value.x.y - Value.y.y)

# Summarize
particular_forms_of_wealth_scale_percentiles_average <- geographic_scale_data_percentiles_forms_of_wealth %>%
  group_by(Group) %>%
  summarize(Ratio_real_estate = mean(Ratio_real_estate_segregation, na.rm = TRUE),
            Ratio_financial_wealth = mean(Ratio_financial_wealth_segregation, na.rm = TRUE),
            Mean_segregation_real_estate_500m = mean(Value.x.x, na.rm = TRUE),
            Mean_segregation_real_estate_4000m = mean(Value.y.x, na.rm = TRUE),
            Mean_segregation_financial_wealth_500m = mean(Value.x.y, na.rm = TRUE),
            Mean_segregation_financial_wealth_4000m = mean(Value.y.y, na.rm = TRUE)
  )
particular_forms_of_wealth_scale_percentiles_average <- particular_forms_of_wealth_scale_percentiles_average %>%
  mutate(Real_estate_net_micro_segregation = Mean_segregation_real_estate_500m - Mean_segregation_real_estate_4000m,
         Financial_wealth_net_micro_segregation = Mean_segregation_financial_wealth_500m - Mean_segregation_financial_wealth_4000m)

# Plot
ggplot(particular_forms_of_wealth_scale_percentiles_average) +
  geom_point(aes(x = Group, y = Real_estate_net_micro_segregation), colour = "#0C7BDC") +
  geom_point(aes(x = Group, y = Financial_wealth_net_micro_segregation), colour = "#FFC20A")


ggplot(particular_forms_of_wealth_scale_percentiles_average) +
  geom_point(aes(x = Group, y = Mean_segregation_real_estate_4000m), colour = "#0C7BDC") +
  geom_point(aes(x = Group, y = Mean_segregation_financial_wealth_4000m), colour = "#FFC20A")


# Plot
ggplot(particular_forms_of_wealth_scale_percentiles_average) +
  geom_point(aes(x = Group, y = Ratio_real_estate), colour = "#0C7BDC") +
  geom_point(aes(x = Group, y = Ratio_financial_wealth), colour = "#FFC20A")





#### Calculate scale of segregation at the city level #### -----------------------
# Merge databases
income_combined <- merge(income_all_cities_years_SROITI, income_all_cities_years_SROITI_4000m, by = c("City", "Year"))
wealth_combined <- merge(wealth_all_cities_years_SROITI, wealth_all_cities_years_SROITI_4000m, by = c("City", "Year"))

# Calculate ratio
income_combined$Ratio_income_segregation <- income_combined$SROITI.y / income_combined$SROITI.x
wealth_combined$Ratio_wealth_segregation <- wealth_combined$SROITI.y / wealth_combined$SROITI.x

# Merge 
geographic_scale_data <- merge(income_combined, wealth_combined, by = c("City", "Year"))
geographic_scale_data$Difference <- geographic_scale_data$Ratio_income_segregation - geographic_scale_data$Ratio_wealth_segregation




#### Calculate relationship between segregation and old people ####

# Load data and shapefiles
grid_cell_data <- read.csv("processed_data/grid_cell_summary_500m.csv")
#cells <- st_read("processed_data/Grid_cells_The_Hague.shp")
cells <- st_read("processed_data/Grid_cells_data.shp") # Downloaded from here: https://www.cbs.nl/nl-nl/dossier/nederland-regionaal/geografische-data/kaart-van-500-meter-bij-500-meter-met-statistieken

# Remove the geometry column
cells <- cells %>%
  select(-geometry)

# Merge
cells <- merge(grid_cell_data, cells, by.x = "VRLVIERKANT500M", by.y = "crs28992re", all.x = TRUE, all.y = FALSE)

# Calculate share of adult and retired people
cells$Old <- (cells$aantal_i_5 + cells$aantal_i_4) / cells$aantal_inw
cells$Young_adults <- (cells$aantal_i_2 + cells$aantal_i_3) / cells$aantal_inw

# Delete NA values
cells <- cells %>%
  filter(aantal_i_5 > -99997)

cells <- cells %>%
  filter(aantal_i_4 > -99997)

cells <- cells %>%
  filter(aantal_i_2 > -99997)

cells <- cells %>%
  filter(aantal_i_3 > -99997)

# Calculate correlation between old people and wealth/income ratio 
cor(cells$median_ratio_wealth_income, cells$Old)
cor(cells$median_ratio_wealth_income, cells$Young_adults)
cor(cells$ratio_median_wealth_income_grid_cell, cells$Old)
cor(cells$ratio_median_wealth_income_grid_cell, cells$Old)
cor(cells$mean_wealth, cells$Old)
cor(cells$mean_income, cells$Old)
cor(cells$mean_income, cells$Young_adults)
cor(cells$mean_wealth, cells$Young_adults)

# Plot
old <- ggplot(cells, aes(x = Old, y = median_ratio_wealth_income)) +
  geom_point(color = "#FFC20A", size = 0.2) +
  geom_smooth(method = "lm", color = "#0C7BDC", se = FALSE) +
  xlab("People over 45 (%)") + ylab("Median ratio between\n wealth and income") +
  scale_x_continuous(labels = scales::percent) + # Format x-axis as %
  theme(axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"))

# Plot
young <- ggplot(cells, aes(x = Young_adults, y = median_ratio_wealth_income)) +
  geom_point(color = "#FFC20A", size = 0.2) +
  geom_smooth(method = "lm", color = "#0C7BDC", se = FALSE) +
  xlab("People between 15 and 45 (%)") + ylab("Median ratio between\n wealth and income") +
  scale_x_continuous(labels = scales::percent) + # Format x-axis as %
  theme(axis.title.x = element_text(size = 14, face = "bold"),
        axis.title.y = element_text(size = 14, face = "bold"))


# Plot together
old / young


old_people_classification <- cells %>%
  arrange(Old)

old_people_classification <- old_people_classification %>%
  mutate(Elderly_decile = ntile(Old, 10))

old_people_classification <- old_people_classification %>%
  group_by(Elderly_decile) %>%
  summarize(Median_ratio_wealth_income = mean(median_ratio_wealth_income))

# Convert Born_abroad_decile to a factor with levels 1 to 10
old_people_classification$Elderly_decile <- factor(
  old_people_classification$Elderly_decile,
  levels = 1:10
)

# Create the bar plot
ggplot(old_people_classification, aes(x = Elderly_decile, y = Median_ratio_wealth_income)) +
  geom_bar(stat = "identity", fill = "#FFC20A", width = 0.8) +
  xlab("Decile of grid cells depending\non their share of people over 45") +
  ylab("Median ratio between\nwealth and income") +
  scale_x_discrete(labels = as.character(1:10)) + # Ensure x-axis shows 1 to 10 as labels
  theme(
    axis.title.x = element_text(size = 18, face = "bold"),
    axis.title.y = element_text(size = 18, face = "bold"),
    axis.text.x = element_text(size = 16) # Rotate x-axis labels for better readability
  ) +
  ylim(0,10)

#### Study migration #### ------------------------------------------------------

# Load data and shapefiles
grid_cell_data <- read.csv("processed_data/grid_cell_summary_500m.csv")
#cells <- st_read("processed_data/Grid_cells_The_Hague.shp")
cells <- st_read("processed_data/Grid_cells_data.shp")

# Remove the geometry column
cells <- cells %>%
  select(-geometry)

# Merge
cells <- merge(grid_cell_data, cells, by.x = "VRLVIERKANT500M", by.y = "crs28992re", all.x = TRUE, all.y = FALSE)

# Calculate share of born abroad
cells$Share_born_abroad <- 100 - cells$percentage

# Delete NAs
cells <- cells %>%
  filter(percentage > -99997)

# Calculate correlation between old people and wealth/income ratio 
cor(cells$median_ratio_wealth_income, cells$Share_born_abroad)

# Order following the % of people born abroad
born_abroad_classification <- cells %>%
  arrange(Share_born_abroad)

# Create a decile classification
born_abroad_classification <- born_abroad_classification %>%
  mutate(Born_abroad_decile = ntile(Share_born_abroad, 10))

# Calculate the median ratio per every decile
born_abroad_classification <- born_abroad_classification %>%
  group_by(Born_abroad_decile) %>%
  summarize(Median_ratio_wealth_income = mean(median_ratio_wealth_income))

# Convert Born_abroad_decile to a factor with levels 1 to 10
born_abroad_classification$Born_abroad_decile <- factor(
  born_abroad_classification$Born_abroad_decile,
  levels = 1:10
)

# Create the bar plot
ggplot(born_abroad_classification, aes(x = Born_abroad_decile, y = Median_ratio_wealth_income)) +
  geom_bar(stat = "identity", fill = "#FFC20A", width = 0.8) +
  xlab("Decile of grid cells depending\non their share of people born abroad") +
  ylab("Median ratio between\nwealth and income") +
  scale_x_discrete(labels = as.character(1:10)) + # Ensure x-axis shows 1 to 10 as labels
  theme(
    axis.title.x = element_text(size = 18, face = "bold"),
    axis.title.y = element_text(size = 18, face = "bold"),
    axis.text.x = element_text(size = 16) # Rotate x-axis labels for better readability
  ) +
  ylim(0,10)

#### With a violin plot #### ---------------------------------------------------
# Add decile classification to cells data
cells <- cells %>%
  arrange(Share_born_abroad) %>%
  mutate(Born_abroad_decile = ntile(Share_born_abroad, 10))

# Convert Born_abroad_decile to a factor with levels 1 to 10 for proper ordering in the plot
cells$Born_abroad_decile <- factor(
  cells$Born_abroad_decile,
  levels = 1:10
)

# Create the violin plot
ggplot(cells, aes(x = Born_abroad_decile, y = median_ratio_wealth_income)) +
  geom_violin(fill = "#FFC20A", color = "black", scale = "width") +
  xlab("Decile of grid cells depending\non their share of people born abroad") +
  ylab("Median ratio between\nwealth and income") +
  scale_x_discrete(labels = as.character(1:10)) +
  # geom_jitter(width = 0.2, size = 1, alpha = 0.5) +
  # geom_boxplot(width = 0.1, fill = "white") +
  # ylim(0, 10) +
  theme(
    axis.title.x = element_text(size = 18, face = "bold"),
    axis.title.y = element_text(size = 18, face = "bold"),
    axis.text.x = element_text(size = 16)
  )



#### Delving into the growth of segregation #### -------------------------------
# Calculate evolution of real estate segregation
real_estate_2022 <- real_estate_all_cities_years_SROITI %>%
  filter(Year == 2022)

real_estate_2011 <- real_estate_all_cities_years_SROITI %>%
  filter(Year == 2011)

evolution_real_estate_segregation <- merge(real_estate_2022, real_estate_2011, by = "City")
evolution_real_estate_segregation$Difference_real_estate <- evolution_real_estate_segregation$SROITI.x - evolution_real_estate_segregation$SROITI.y

# Calculate evolution of financial wealth segregation
financial_wealth_2022 <- movable_wealth_all_cities_years_SROITI %>%
  filter(Year == 2022)

financial_wealth_2011 <- movable_wealth_all_cities_years_SROITI %>%
  filter(Year == 2011)

evolution_financial_wealth_segregation <- merge(financial_wealth_2022, financial_wealth_2011, by = "City")
evolution_financial_wealth_segregation$Difference_financial_wealth <- evolution_financial_wealth_segregation$SROITI.x - evolution_financial_wealth_segregation$SROITI.y

# Merge
evolution_all_kinds_of_wealth <- merge(evolution_real_estate_segregation, evolution_financial_wealth_segregation, by = c("City", "Year.x", "Year.y"))

mean(evolution_all_kinds_of_wealth$Difference_real_estate)
mean(evolution_all_kinds_of_wealth$Difference_financial_wealth)

#### Calculate cumulative shares of wealth ####

data_Gini <- data_Gini %>%
  arrange(Percentile) %>%  # Ensure data is sorted by percentile
  mutate(Cumulative_Share_wealth = cumsum(Share_wealth_per_capita))

data_Gini <- data_Gini %>%
  arrange(Percentile) %>%  # Ensure data is sorted by percentile
  mutate(Cumulative_Share_income = cumsum(Share_income_per_capita))

data_Gini$Share_wealth_per_capita
