#### Preparation #### ----------------------------------------------

# Run first "01_" and "02_" files
  
# load packages
library(tidyverse)
library(patchwork)
library(ggrepel)
library(REAT)
library(scales)
library(ggplotify)   # To convert base plots into ggplot objects
library(cowplot)
library(readxl)

# Avoid scientific notation
options(scipen = 999999)
  
# Load data
data <- read_excel("processed_data/segregation_data_FUA_level.xlsx")
data_Gini <- read.csv("processed_data/Distribution_data.csv")
data_kinds_of_wealth <- read.csv("processed_data/share_type_wealth_percentile.csv")
wealth_per_year_of_birth <- read_excel("processed_data/wealth_per_year_of_birth.xlsx")
wealth_per_year_of_birth_born_in_the_Netherlands <- read_excel("processed_data/wealth_per_year_of_birth_born_in_the_Netherlands.xlsx")
wealth_per_year_of_birth_born_abroad <- read_excel("processed_data/wealth_per_year_of_birth_born_abroad.xlsx")
wealth_type_per_year_of_birth <- read_excel("processed_data/share_type_wealth_age_simplified.xlsx")

# Load percentiles thresholds
thresholds_percentiles <- read_excel("processed_data/Thresholds_percentiles.xlsx")

#### Figure 1. Methodological examples #### ----------------------------------------------
  
# Filter for 2022
data_Amsterdam <- geographic_scale_data_percentiles %>%
  filter(FUA == "Amsterdam")
  
# Plot wealth segregation (500m) at the percentile level in Amsterdam
part1_figure1 <- ggplot(data_Amsterdam) +
  geom_point(aes(x = Group, y = Value.x.y), color = "#0C7BDC") +
  geom_point(aes(x = Group, y = Wealth_net_micro_segregation), color = "#084C8D") +
  geom_point(aes(x = Group, y = Value.y.y), color = "#FFC20A") +
  xlab("Percentile") + ylab("Segregation (Information Theory Index)") +
  scale_x_continuous(limits = c(1, 99), breaks = c(1, 25, 50, 75, 99)) +  # Specify breaks
  ylim(0, 0.2) +
  theme(axis.title.y = element_text(face = "bold"),
        axis.title.x = element_text(face = "bold"))

part2_figure1 <- ggplot(data_Amsterdam) +
  geom_point(aes(x = Group, y = Ratio_wealth_segregation), color = "#FFE066") +
  xlab("Percentile") + ylab("Degree of macro-scale\nsegregation (4000m/500m)") + 
  ylim(0, 1) +
  theme(axis.title.y = element_text(face = "bold"), # Bold the y-axis label
        axis.title.x = element_text(face = "bold")) +
  scale_x_continuous(limits = c(1, 99), breaks = c(1, 25, 50, 75, 99))  # Specify breaks
  
# Combine the figures
Figure_1 <- part1_figure1 / part2_figure1
  
# Save final PNG at print quality
ggsave("Final_graphs/Figure_01_B.png", Figure_1,
       width = 8.0/1.4, height = 11.0/1.4, units = "in", dpi = 1200)
  
# Save only the first part of the graph
ggsave("Final_graphs/Figure_01_A.png", part1_figure1,
       width = 8.0/1.4, height = 5/1.4, units = "in", dpi = 1200)
  
#### Figure 2. Plot Lorenz curves #### ----------------------------------------------

# Save Lorenz curves to PNG
png("Final_graphs/Figure_02.png", width = 8, height = 5, units = "in", res = 1200)

# Plot the Lorenz curve for income per capita
lorenz(data_Gini$Sum_income_per_capita, 
       lcx = "Cumulative share of the population", 
       lcy = "Cumulative share of wealth/income", 
       lctitle = "Lorenz Curves of wealth and income",
       le.col = "#a9a9a9", 
         lc.col = "#FFC20A",
         lsize = 3.5, 
         ltype = "solid", 
         bg.col = "gray95", 
         bgrid = TRUE, 
         bgrid.col = "white", 
         bgrid.size = 2, 
         bgrid.type = "solid",
         lcg = FALSE, 
         lcgn = FALSE, 
         lcg.caption = NULL, 
         lcg.lab.x = 0, 
         lcg.lab.y = 1, 
         add.lc = FALSE, 
         plot.lc = TRUE)
  
# Add the Lorenz curve for wealth per capita
lorenz(data_Gini$Sum_wealth_per_capita, 
         lc.col = "#0C7BDC",
         lsize = 3.5, 
         #ltype = "dashed",  # Different line type for differentiation
         add.lc = TRUE)  # Add to the existing plot
  
# Add the third Lorenz curve for real estate wealth
lorenz(data_Gini$Sum_real_estate_per_capita, 
         lc.col = "#63A1F4",  # Choose a new color
         lsize = 3.5, 
         # ltype = "dotted",  # Optionally, use a different line type
         add.lc = TRUE)  # Add to the existing plot
  
# Add the fourth Lorenz curve for financial wealth
lorenz(data_Gini$Sum_movable_wealth_per_capita, 
         lc.col = "#084C8D",  # Choose a new color
         lsize = 3.5, 
         # ltype = "dotted",  # Optionally, use a different line type
         add.lc = TRUE)  # Add to the existing plot

# Finish and save
dev.off()

#### Figure 3. Plot the kind of wealth at the percentile level with a stacked area plot #### -------------------------------------------------------------

# Convert data to long format for ggplot2
data_figure_3 <- data_kinds_of_wealth %>%
    pivot_longer(cols = c(Real_estate_wealth_share,
                          Deposits_and_savings_wealth_share,
                          Entrepenurial_wealth_share,
                          Share_and_bonds_wealth_share,
                          Other_wealth_share),
                 names_to = "AssetType",
                 values_to = "Percentage")
  
# Reorder AssetType levels
data_figure_3$AssetType <- factor(data_figure_3$AssetType, 
                                    levels = c("Other_wealth_share", 
                                               "Entrepenurial_wealth_share", 
                                               "Deposits_and_savings_wealth_share", 
                                               "Share_and_bonds_wealth_share", 
                                               "Real_estate_wealth_share"))

# Plot  
Figure_03 <- ggplot(data_figure_3, aes(x = Wealth_per_capita_percentile, y = Percentage, fill = AssetType)) +
    geom_area(alpha = 0.8, size = 0.5, colour = "white") +
    scale_fill_manual(values = c("Real_estate_wealth_share" = "#FFC20A",
                                 "Deposits_and_savings_wealth_share" = "#0b78cc", 
                                 "Entrepenurial_wealth_share" = "#085a99",
                                 "Share_and_bonds_wealth_share" = "#0e93ff",
                                 "Other_wealth_share" = "black"
                                 ),
                      labels = c(
                        "Other_wealth_share" = "Other forms of wealth (e.g. cash)",
                        "Entrepenurial_wealth_share" = "Professional assets needed by\nself-employed people",
                        "Deposits_and_savings_wealth_share" = "Deposits and savings",
                        "Share_and_bonds_wealth_share" = "Company shares and bonds",
                        "Real_estate_wealth_share" = "Real estate"
                      )) +
    scale_x_continuous(
      limits = c(0, 100),  # Set x-axis limits from 0 to 100
      breaks = c(1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100),  # Define custom breaks
      labels = c("1", "10", "20", "30", "40", "50", "60", "70", "80", "90", "100")
    ) +
    scale_y_continuous(
      labels = label_percent(accuracy = 1)  # Format y-axis as percentages
    ) +
    labs(x = "Wealth Percentile", y = "Percentage of total wealth",
         title = "Asset composition by wealth percentile",
         fill = "Asset type") +
    theme_minimal() +
    theme(plot.title = element_text(face = "bold"), # Bold the title
          axis.title.y = element_text(face = "bold"), # Bold the y-axis label
          axis.title.x = element_text(face = "bold"), # Bold the y-axis label
          #legend.text = element_text(face = "bold"), # Make legend text bold
          panel.grid = element_blank(), # Remove all grid lines
          axis.line = element_line(color = "grey"), # Add axis lines
          legend.position = "bottom") +
    guides(fill = guide_legend(ncol = 3))  # Sets legend to have two columns


# Save 
ggsave("Final_graphs/Figure_03.png", Figure_03,
       width = 9/1.4, height = 6/1.4, units = "in", dpi = 1200)

#### Figure 4. Evolution of wealth according to age and place of birth (data from 2022) ####

# Reshape data to long format for easier plotting with legend
wealth_long <- wealth_per_year_of_birth %>%
  tidyr::pivot_longer(
    cols = c(mean_per_capita_wealth, median_per_capita_wealth),
    names_to = "measure",
    values_to = "wealth"
  ) %>%
  mutate(
    measure = factor(measure,
                     levels = c("median_per_capita_wealth", "mean_per_capita_wealth"),
                     labels = c("Median wealth (per capita)", "Mean wealth (per capita)"))
  )

# Plot
plot_A <- ggplot(wealth_long, aes(x = Age, y = wealth, color = measure, shape = measure)) +
  geom_point(size = 2) +         # show points with shapes
  geom_line() +                  # lines for continuity
  scale_color_manual(values = c("#0C7BDC", "#FFC20A")) +
  scale_shape_manual(values = c(16, 17)) + # 16 = circle, 17 = triangle
  scale_y_continuous(labels = label_currency(prefix = "€", big.mark = ",", decimal.mark = "."),
                     limits = c(0, 300000)) +
  xlim(18, 100) +
  labs(x = "Age of the head of the household", y = NULL, color = NULL, shape = NULL) +
  guides(color = guide_legend(override.aes = list(shape = c(16, 17)))) + # link colors with shapes
  theme_minimal() +
  theme(plot.title = element_text(face = "bold"),
        axis.title.x = element_text(face = "bold"),
        axis.line = element_line(color = "grey"),
        legend.position = "bottom")


# Reshape data to long format for easier plotting with legend
wealth_long <- rbind(
  data.frame(
    Age = wealth_per_year_of_birth_born_abroad$Age,
    wealth = wealth_per_year_of_birth_born_abroad$mean_per_capita_wealth,
    type = "Mean wealth",
    origin = "Born abroad"
  ),
  data.frame(
    Age = wealth_per_year_of_birth_born_in_the_Netherlands$Age,
    wealth = wealth_per_year_of_birth_born_in_the_Netherlands$mean_per_capita_wealth,
    type = "Mean wealth",
    origin = "Born in NL"
  ),
  data.frame(
    Age = wealth_per_year_of_birth_born_abroad$Age,
    wealth = wealth_per_year_of_birth_born_abroad$median_per_capita_wealth,
    type = "Median wealth",
    origin = "Born abroad"
  ),
  data.frame(
    Age = wealth_per_year_of_birth_born_in_the_Netherlands$Age,
    wealth = wealth_per_year_of_birth_born_in_the_Netherlands$median_per_capita_wealth,
    type = "Median wealth",
    origin = "Born in NL"
  )
)

# Create a grouping variable that matches legend labels
wealth_long$color_group <- with(wealth_long, paste(type, "-", origin))

# Define colors for each combination
colors <- c(
  "Mean wealth - Born abroad"  = "#FFC20A",
  "Mean wealth - Born in NL"   = "#0C7BDC",
  "Median wealth - Born abroad" = "#FFE699",
  "Median wealth - Born in NL"  = "#085a99"
)

# Shapes: circle for mean, triangle for median
shapes <- c(
  "Mean wealth - Born abroad"  = 16,
  "Mean wealth - Born in NL"   = 16,
  "Median wealth - Born abroad" = 17,
  "Median wealth - Born in NL"  = 17
)

# Filter only median wealth data
wealth_median_long <- rbind(
  data.frame(
    Age = wealth_per_year_of_birth_born_abroad$Age,
    wealth = wealth_per_year_of_birth_born_abroad$median_per_capita_wealth,
    origin = "Born abroad"
  ),
  data.frame(
    Age = wealth_per_year_of_birth_born_in_the_Netherlands$Age,
    wealth = wealth_per_year_of_birth_born_in_the_Netherlands$median_per_capita_wealth,
    origin = "Born in the Netherlands"
  )
)

# Colors for each origin
colors_median <- c(
  "Born abroad" = "#FFC20A",
  "Born in the Netherlands"  = "#0C7BDC"
)

# Plot
plot_B <- ggplot(wealth_median_long, aes(x = Age, y = wealth, color = origin, shape = origin)) +
  geom_line() +
  geom_point(size = 2) +   # no fixed shape here → uses mapping
  scale_color_manual(values = colors_median, name = NULL) +
  scale_shape_manual(values = c("Born abroad" = 17,  # triangle
                                "Born in the Netherlands" = 16), 
                     name = NULL) +
  scale_y_continuous(labels = scales::label_currency(prefix = "€", big.mark = ",", decimal.mark = "."),
                     limits = c(-1000, 300000)) +
  labs(x = "Age of the head of the household", y = "Median wealth per capita") +
  xlim(18, 100) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    axis.title.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold"),
    axis.line = element_line(color = "grey"),
    legend.position = "bottom"
  )

# Combine plots vertically
Figure_04 <- plot_A / plot_B  # "/" stacks them top-bottom

# Save as high-resolution PNG
ggsave("Final_graphs/Figure_04.png", Figure_04,
       width = 8.0/1.2, height = 10/1.2, units = "in", dpi = 1200)

#### Figure 11. Plot at the same time income and wealth segregation per percentiles #### ----------------------------------------------
  # Filter data for the most recent year (2022) and for wealth at 500m radius
  data_2022_wealth_1 <- data %>%
    filter(Year == 2022) %>%
    filter(Variable == "Wealth_per_capita_500m")
    #filter(Variable == "Wealth_500m")
  
  # Filter data for the most recent year (2022) and for wealth at 4000m radius
  data_2022_wealth_2 <- data %>%
    filter(Year == 2022) %>%
    filter(Variable == "Wealth_per_capita_4000m")
    #filter(Variable == "Wealth_4000m")
  
  # Filter data for the most recent year (2022) and for wealth at 500m radius
  data_2022_wealth_3_a <- data %>%
    filter(Year == 2022) %>%
    filter(Variable == "Wealth_per_capita_500m")
  
  # Filter data for the most recent year (2022) and for wealth at 4000m radius
  data_2022_wealth_3_b <- data %>%
    filter(Year == 2022) %>%
    filter(Variable == "Wealth_per_capita_4000m")
  
  # Merge
  data_2022_wealth_3 <- merge(data_2022_wealth_3_a, data_2022_wealth_3_b, by = c("Group", "FUA", "Year"))
  
  # Calculate net micro segregation value
  data_2022_wealth_3$Net_micro_segregation <- data_2022_wealth_3$Value.x - data_2022_wealth_3$Value.y
  
  # Get the average for the first case
  average_2022_wealth_1 <- data_2022_wealth_1 %>%
    group_by(Group) %>%       # Group data by the "Group" column, which represents percentiles
    summarize(Average_Value = mean(Value, na.rm = TRUE))  # Calculate the mean value for each group
  
  # Get the average for the second case
  average_2022_wealth_2 <- data_2022_wealth_2 %>%
    group_by(Group) %>%       # Group data by the "Group" column, which represents percentiles
    summarize(Average_Value = mean(Value, na.rm = TRUE))  # Calculate the mean value for each group
  
  # Get the average for the net micro segregation 
  average_2022_wealth_3 <- data_2022_wealth_3 %>%
    group_by(Group) %>%       # Group data by the "Group" column, which represents percentiles
    summarize(Average_Value = mean(Net_micro_segregation, na.rm = TRUE))  # Calculate the mean value for each group
  
  # Plot both
  wealth_segregation <- ggplot() +
    geom_line(aes(x = Group, y = Average_Value), data = average_2022_wealth_1, color = "grey") +
    geom_point(aes(x = Group, y = Average_Value), data = average_2022_wealth_1, color = "#0C7BDC") +
    geom_line(aes(x = Group, y = Average_Value), data = average_2022_wealth_2, color = "grey") +
    geom_point(aes(x = Group, y = Average_Value), data = average_2022_wealth_2, color = "#FFC20A") +
    geom_line(aes(x = Group, y = Average_Value), data = average_2022_wealth_3, color = "grey") +
    geom_point(aes(x = Group, y = Average_Value), data = average_2022_wealth_3, color = "#084C8D") +
    xlab("Percentile") + ylab(NULL
                              #"Segregation (Spatial \nInformation Theory Index)"
    ) +
    #ylab("Segregation (Spatial \nInformation Theory Index)") +
    ggtitle("Average wealth segregation") +
    theme(axis.title.x = element_text(size = 12, face = "bold"),
          axis.title.y = element_text(size = 12, face = "bold"),
          plot.title = element_text(size = 12, face = "bold")) +
    ylim(0, 0.16) +
    scale_x_continuous(limits = c(1, 99), breaks = c(1, 25, 50, 75, 99))  # Specify breaks

  
  # Filter data for the most recent year (2022) and for income at 500m radius
  data_2022_income_1 <- data %>%
    filter(Year == 2022) %>%
    filter(Variable == "Income_per_capita_500m")
    #filter(Variable == "Income_500m")
  
  # Filter data for the most recent year (2022) and for income at 4000m radius
  data_2022_income_2 <- data %>%
    filter(Year == 2022) %>%
    filter(Variable == "Income_per_capita_4000m")
    #filter(Variable == "Income_4000m")
  
  # Filter data for the most recent year (2022) and for wealth at 4000m radius
  data_2022_income_3_a <- data %>%
    filter(Year == 2022) %>%
    filter(Variable == "Income_per_capita_500m")
  
  # Filter data for the most recent year (2022) and for wealth at 4000m radius
  data_2022_income_3_b <- data %>%
    filter(Year == 2022) %>%
    filter(Variable == "Income_per_capita_4000m")
  
  # Merge
  data_2022_income_3 <- merge(data_2022_income_3_a, data_2022_income_3_b, by = c("Group", "FUA", "Year"))
  
  # Calculate net micro segregation value
  data_2022_income_3$Net_micro_segregation <- data_2022_income_3$Value.x - data_2022_income_3$Value.y
  
  # Get the average for the first case
  average_2022_income_1 <- data_2022_income_1 %>%
    group_by(Group) %>%       # Group data by the "Group" column, which represents percentiles
    summarize(Average_Value = mean(Value, na.rm = TRUE))  # Calculate the mean value for each group
  
  # Get the average for the second case
  average_2022_income_2 <- data_2022_income_2 %>%
    group_by(Group) %>%       # Group data by the "Group" column, which represents percentiles
    summarize(Average_Value = mean(Value, na.rm = TRUE))  # Calculate the mean value for each group
  
  # Get the average for the net micro segregation 
  average_2022_income_3 <- data_2022_income_3 %>%
    group_by(Group) %>%       # Group data by the "Group" column, which represents percentiles
    summarize(Average_Value = mean(Net_micro_segregation, na.rm = TRUE))  # Calculate the mean value for each group
  
  # Plot both
  income_segregation <- ggplot() +
    geom_line(aes(x = Group, y = Average_Value), data = average_2022_income_1, color = "grey") +
    geom_point(aes(x = Group, y = Average_Value), data = average_2022_income_1, color = "#0C7BDC") +
    geom_line(aes(x = Group, y = Average_Value), data = average_2022_income_2, color = "grey") +
    geom_point(aes(x = Group, y = Average_Value), data = average_2022_income_2, color = "#FFC20A") +
    geom_line(aes(x = Group, y = Average_Value), data = average_2022_income_3, color = "grey") +
    geom_point(aes(x = Group, y = Average_Value), data = average_2022_income_3, color = "#084C8D") +
    xlab("Percentile") + ylab("Segregation (Spatial \nInformation Theory Index)") +
    #ylab(NULL
      #"Segregation (Spatial \nInformation Theory Index)"
      #) +
    theme(axis.title.x = element_text(size = 12, face = "bold"),
          axis.title.y = element_text(size = 12, face = "bold"),
          plot.title = element_text(size = 12, face = "bold")) +
    ggtitle("Average income segregation") +
    ylim(0, 0.16) +
    scale_x_continuous(limits = c(1, 99), breaks = c(1, 25, 50, 75, 99))  # Specify breaks
  
# Plot the graph
income_segregation | wealth_segregation

# Save the graph
Figure_11 <- income_segregation | wealth_segregation

# Save as a file
ggsave("Final_graphs/Figure_11.png", Figure_11,
       width = 8.0/1.2, height = 6/1.2, units = "in", dpi = 1200)
  
#### Figure 7. Plot income and wealth segregation in the same graph, but only at a radius of 500m net of 4000m #### ---------

# Plot
Figure_7 <- ggplot() +
    geom_line(aes(x = Group, y = Average_Value), data = average_2022_income_1, color = "grey") +
    geom_point(aes(x = Group, y = Average_Value), data = average_2022_income_1, color = "#FFC20A", size = 2) +
    geom_line(aes(x = Group, y = Average_Value), data = average_2022_wealth_1, color = "grey") +
    geom_point(aes(x = Group, y = Average_Value), data = average_2022_wealth_1, color = "#0C7BDC", size = 2) +
    xlab("Percentile") + ylab("Segregation (Spatial Information\nTheory Index, radius = 500m)") +
    theme(axis.title.x = element_text(size = 14, face = "bold"),
          axis.title.y = element_text(size = 14, face = "bold"),
          axis.text.x = element_text(size = 12),  # Increase x-axis text size
          axis.text.y = element_text(size = 12)) +  # Increase y-axis text size
    ylim(0, 0.16) +
    scale_x_continuous(limits = c(1, 99), breaks = c(1, 25, 50, 75, 99))  # Specify breaks

# Plot
Figure_7

# Save as a file
ggsave("Final_graphs/Figure_7.png", Figure_7,
       width = 8.0/1.2, height = 6/1.2, units = "in", dpi = 1200)

  
  #### Figure 5. Difference when using income and wealth in net micro segregation #### -----------------
  
  # Compare wealth and income segregation by percentiles
  comparison_percentiles_500m <- merge(average_2022_wealth_3, average_2022_income_3, by = "Group")
  comparison_percentiles_500m$Difference <- comparison_percentiles_500m$Average_Value.x - comparison_percentiles_500m$Average_Value.y
  
  # Plot
  ggplot(data = comparison_percentiles_500m) +
    geom_col(aes(x = Group, y = Difference, fill = Difference > 0)) +
    scale_fill_manual(values = c("TRUE" = "#0C7BDC", "FALSE" = "#FFC20A")) +
    labs(x = "Percentiles", y = "Difference in SITI", fill = "Positive Difference") +
    theme(legend.position = "none")
  
  #### Calculate ratios #### -----------------------------------------------------
  
  # Merge databases
  ratio_wealth_2022 <- merge(average_2022_wealth_1, average_2022_wealth_2, by = "Group")
  ratio_income_2022 <- merge(average_2022_income_1, average_2022_income_2, by = "Group")
  
  # Calculate ratio
  ratio_income_2022$Ratio <- ratio_income_2022$Average_Value.y/ratio_income_2022$Average_Value.x
  ratio_wealth_2022$Ratio <- ratio_wealth_2022$Average_Value.y/ratio_wealth_2022$Average_Value.x
  
  # Calculate difference
  ratio_income_2022$Difference <- ratio_income_2022$Average_Value.y-ratio_income_2022$Average_Value.x
  ratio_wealth_2022$Difference <- ratio_wealth_2022$Average_Value.y-ratio_wealth_2022$Average_Value.x
  
  #### Check -- Plot
  ggplot() +
    geom_point(data = ratio_wealth_2022, aes(x = Group, y = Ratio, color = "#FFC20A")) +
    geom_point(data = ratio_income_2022, aes(x = Group, y = Ratio, color = "#0C7BDC"))
  
  ggplot() +
    geom_point(data = ratio_wealth_2022, aes(x = Group, y = Difference, color = "#FFC20A")) +
    geom_point(data = ratio_income_2022, aes(x = Group, y = Difference, color = "#0C7BDC")) +
    ylim(-0.13, 0)
  
    
  # Plot both
  ggplot() +
    geom_line(aes(x = Group, y = Ratio, color = "Income Segregation"), data = ratio_income_2022) +
    geom_point(aes(x = Group, y = Ratio, color = "Income Segregation"), data = ratio_income_2022) +
    geom_line(aes(x = Group, y = Ratio, color = "Wealth Segregation"), data = ratio_wealth_2022) +
    geom_point(aes(x = Group, y = Ratio, color = "Wealth Segregation"), data = ratio_wealth_2022) +
    xlab("Percentile") +
    ylab("Degree of macro-scale\n segregation (4000m/500m)") +
    #ggtitle("Geographical scale of segregation") +
    scale_color_manual(values = c("Income Segregation" = "#FFC20A", "Wealth Segregation" = "#0C7BDC")) +
    theme(
      legend.position = "bottom",
      legend.title = element_blank(),
      axis.title.y = element_text(face = "bold", size = 16), # Increased font size
      axis.title.x = element_text(face = "bold", size = 16), # Increased font size
      axis.text = element_text(size = 16), # Increased size for axis text
      legend.text = element_text(size = 16)) # Increased size for legend text
  
  #### Study Amsterdam ####
  data_Amsterdam <- data %>%
    filter(FUA == "Amsterdam")
  
  data_Amsterdam_2022 <- data_Amsterdam %>%
    filter(Year == "2022")
  
  data_Amsterdam_2022_wealth <- data_Amsterdam_2022 %>%
    filter(Variable == "Wealth_per_capita_500m")
  
  data_Amsterdam_2022_income <- data_Amsterdam_2022 %>%
    filter(Variable == "Income_per_capita_500m")
  
  # Plot
  ggplot() +
    geom_line(aes(x = Group, y = Value), data = data_Amsterdam_2022_wealth, color = "grey") +
    geom_point(aes(x = Group, y = Value), data = data_Amsterdam_2022_wealth, color = "#FFC20A") +
    geom_line(aes(x = Group, y = Value), data = data_Amsterdam_2022_income, color = "grey") +
    geom_point(aes(x = Group, y = Value), data = data_Amsterdam_2022_income, color = "#0C7BDC") +
    xlab("Percentile") + ylab("Segregation (Spatial \nInformation Theory Index)") +
    ggtitle("Economic segregation in Amsterdam")
  
  
  #### Compare wealth segregation of 2022 with wealth segregation of 2011 ####
  # Filter data for the most recent year (2022) and for wealth at 500m radius
  data_2022_wealth <- data %>%
    filter(Year == 2022) %>%
    filter(Variable == "Wealth_per_capita_500m")
  #filter(Variable == "Wealth_500m")
  
  # Filter data for 2011 and for wealth at 500m radius
  data_2011_wealth <- data %>%
    filter(Year == 2011) %>%
    filter(Variable == "Wealth_per_capita_500m")
  #filter(Variable == "Wealth_500m")
  
  # Merge dataframes
  wealth_segregation_comparison <- merge(data_2011_wealth, data_2022_wealth, by = c("FUA", "Group"))
  
  # Calculate difference
  wealth_segregation_comparison$Difference <- wealth_segregation_comparison$Value.y - wealth_segregation_comparison$Value.x
  
  # Get the average for the first case
  average_wealth_evolution <- wealth_segregation_comparison %>%
    group_by(Group) %>%       # Group data by the "Group" column, which represents percentiles
    summarize(Average_difference = mean(Difference, na.rm = TRUE))  # Calculate the mean value for each group
  
  
  # Plot
  evolution_wealth <- ggplot() +
    geom_col(aes(x = Group, y = Average_difference, fill = Average_difference > 0), data = average_wealth_evolution) +
    scale_fill_manual(values = c("#0C7BDC", "#FFC20A")) +
    xlab("Percentiles") + ylab("Change in segregation (2011-2022)") +
    ggtitle("Evolution of wealth segregation in Dutch FUAs") +
    theme(legend.position = "none") +
    ylim(-0.02, 0.05)
  
  #### Compare income segregation of 2022 with income segregation of 2011 ####
  # Filter data for the most recent year (2022) and for income at 500m radius
  data_2022_income <- data %>%
    filter(Year == 2022) %>%
    filter(Variable == "Income_per_capita_500m")
  #filter(Variable == "income_500m")
  
  # Filter data for 2011 and for income at 500m radius
  data_2011_income <- data %>%
    filter(Year == 2011) %>%
    filter(Variable == "Income_per_capita_500m")
  #filter(Variable == "income_500m")
  
  # Merge dataframes
  income_segregation_comparison <- merge(data_2011_income, data_2022_income, by = c("FUA", "Group"))
  
  # Calculate difference
  income_segregation_comparison$Difference <- income_segregation_comparison$Value.y - income_segregation_comparison$Value.x
  
  # Get the average for the first case
  average_income_evolution <- income_segregation_comparison %>%
    group_by(Group) %>%       # Group data by the "Group" column, which represents percentiles
    summarize(Average_difference = mean(Difference, na.rm = TRUE))  # Calculate the mean value for each group
  
  # Plot
  evolution_income <- ggplot() +
    geom_col(aes(x = Group, y = Average_difference, fill = Average_difference > 0), data = average_income_evolution) +
    scale_fill_manual(values = c("#0C7BDC", "#FFC20A")) +
    xlab("Percentiles") + ylab("Change in segregation (2011-2022)") +
    ggtitle("Evolution of income segregation in Dutch FUAs") +
    theme(legend.position = "none") +
    ylim(-0.02, 0.05)
  
  #### Plot at the same time the evolution of income and wealth segregation ####
  evolution_income / evolution_wealth
  
  #### Compare static levels of income segregation and wealth segregation #### ---------
  
  # Filter data
  SROITI_2022_wealth <- wealth_all_cities_years_SROITI %>%
    filter(Year == 2022)
  
  # Plot
  wealth_SROITI <- ggplot(data = SROITI_2022_wealth) +
    geom_col(aes(x = reorder(City, -SROITI), y = SROITI), fill = "#0C7BDC") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(x = "FUA", y = "SROITI (500 m radius)", title = "SROITI by FUA in 2022") +
    ylim(0, 0.15)
  
  # Filter data
  SROITI_2022_income <- income_all_cities_years_SROITI %>%
    filter(Year == 2022)
  
  # Plot
  income_SROITI <- ggplot(data = SROITI_2022_income) +
    geom_col(aes(x = reorder(City, -SROITI), y = SROITI), fill = "#0C7BDC") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(x = "FUA", y = "SROITI (500 m radius)", title = "SROITI by FUA in 2022") +
    ylim(0, 0.15)
  
  # Filter data
  SROITI_2022_wealth <- wealth_all_cities_years_SROITI %>%
    filter(Year == 2022)
  
  # Combine the data for wealth and income
  SROITI_2022_combined <- bind_rows(
    SROITI_2022_wealth %>% mutate(Type = "Wealth"),
    SROITI_2022_income %>% mutate(Type = "Income")
  )
  
  # Ensure the cities are ordered by wealth SROITI
  SROITI_2022_combined <- SROITI_2022_combined %>%
    mutate(City = factor(City, levels = SROITI_2022_wealth$City[order(-SROITI_2022_wealth$SROITI)]))
  
  # Now with the legend below and with no title
  ggplot(data = SROITI_2022_combined, aes(x = City, y = SROITI, group = Type, color = Type)) +
    geom_line(aes(group = City), color = "grey") +
    geom_point(size = 3) +
    scale_color_manual(values = c("Wealth" = "#0C7BDC", "Income" = "#FFC20A")) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "bottom") +
    ylim(0, 0.15) +
    labs(x = NULL, y = "Segregation (SROITI)")
  
  # Filter real estate data
  SROITI_2022_real_estate <- real_estate_all_cities_years_SROITI %>%
    filter(Year == 2022)
  
  # Filter real estate data
  SROITI_2022_movable_wealth <- movable_wealth_all_cities_years_SROITI %>%
    filter(Year == 2022)
  
  # Combine the data for wealth and income
  SROITI_2022_combined <- bind_rows(
    SROITI_2022_wealth %>% mutate(Type = "Wealth"),
    SROITI_2022_income %>% mutate(Type = "Income"),
    SROITI_2022_real_estate %>% mutate(Type = "Real estate"),
    SROITI_2022_movable_wealth %>% mutate(Type = "Financial wealth")
  )
  
  # Ensure the cities are ordered by wealth SROITI
  SROITI_2022_combined <- SROITI_2022_combined %>%
    mutate(City = factor(City, levels = SROITI_2022_movable_wealth$City[order(-SROITI_2022_movable_wealth$SROITI)]))
  
  # Ensure the cities are ordered by wealth SROITI
  SROITI_2022_combined <- SROITI_2022_combined %>%
    mutate(City = factor(City, levels = SROITI_2022_real_estate$City[order(-SROITI_2022_real_estate$SROITI)]))
  
  # Plot
  ggplot(data = SROITI_2022_combined, aes(x = City, y = SROITI, group = Type, color = Type)) +
    geom_line(aes(group = City), color = "grey") +
    geom_point(size = 3) +
    scale_color_manual(values = c("Wealth" = "#0C7BDC", "Income" = "#FFC20A")) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "bottom") +
    ylim(0, 0.15) +
    labs(x = "FUA", y = "Segregation (SROITI)")
  
  
  ggplot(data = SROITI_2022_combined, aes(x = City, y = SROITI, group = Type, color = Type)) +
    geom_hline(yintercept = 0.08990379, linetype = "dashed", color = "#0C7BDC") +
    geom_hline(yintercept = 0.0326946, linetype = "dashed", color = "#FFC20A") + 
    geom_line(aes(group = City), color = "grey") +
    geom_point(size = 3) +
    scale_color_manual(values = c("Wealth" = "#0C7BDC", "Income" = "#FFC20A")) +
    annotate("text", x = Inf, y = 0.08990379 + 0.002, label = "Mean wealth segregation: 0.09", 
             hjust = 1.1, vjust = -0.5, color = "#0C7BDC", size = 4) +
    annotate("text", x = -Inf, y = 0.0326946 - 0.005, label = "Mean income segregation: 0.03", 
             hjust = -0.1, vjust = 1.5, color = "#FFC20A", size = 4) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "bottom") +
    ylim(0, 0.15) +
    labs(x = NULL, y = "Segregation (SROITI)")
  
   
  
  #### Figure 4. Calculate correlation and plot scatterplots #### ----------------------------
  
  # Merge data
  SROITI_2022_combined_2 <- merge(SROITI_2022_wealth, SROITI_2022_income, by =c("City","Year"))
  
  # Assign proper names
  SROITI_2022_combined_2 <- SROITI_2022_combined_2 %>%
    mutate(SROITI_wealth = SROITI.x,
           SROITI_income = SROITI.y)
  SROITI_2022_combined_2$SROITI.x <- NULL
  SROITI_2022_combined_2$SROITI.y <- NULL
  
  # Merge data with real estate
  SROITI_2022_combined_2 <- merge(SROITI_2022_combined_2, SROITI_2022_real_estate, by =c("City","Year"))
  
  # Assign proper names
  SROITI_2022_combined_2 <- SROITI_2022_combined_2 %>%
    mutate(SROITI_real_estate = SROITI)
  SROITI_2022_combined_2$SROITI <- NULL
  
  # Merge data with real estate
  SROITI_2022_combined_2 <- merge(SROITI_2022_combined_2, SROITI_2022_movable_wealth, by =c("City","Year"))
  
  # Assign proper names
  SROITI_2022_combined_2 <- SROITI_2022_combined_2 %>%
    mutate(SROITI_movable_wealth = SROITI)
  SROITI_2022_combined_2$SROITI <- NULL
  
  correlation_1 <- ggplot(data = SROITI_2022_combined_2, aes(y = SROITI_income, x = SROITI_wealth)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    geom_text_repel(aes(label = City),
                    size = 3,               # Smaller text size
                    color = "black",      
                    alpha = 0.5,   
                    max.overlaps = Inf,             # Ensure all labels are shown
                    force = 5,                      # Stronger repelling force to push labels farther
                    segment.alpha = 0.2,                 # Nudge labels up slightly
                    box.padding = 0.5,              # Padding around labels
                    segment.curvature = -0.1,       # Add slight curvature to lines
                    segment.ncp = 3,                # Smooth line segments
                    max.time = 2) +
    ylab("Income segregation (SROITI)") +
    xlab("Wealth segregation (SROITI)") +
    theme(axis.title.x = element_text(size = 14),
          axis.title.y = element_text(size = 14)) 
  
  
  correlation_2 <- ggplot(data = SROITI_2022_combined_2, aes(y = SROITI_income, x = SROITI_real_estate)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    geom_text_repel(aes(label = City),
                    size = 3,               # Smaller text size
                    color = "black",      
                    alpha = 0.5,   
                    max.overlaps = Inf,             # Ensure all labels are shown
                    force = 5,                      # Stronger repelling force to push labels farther
                    segment.alpha = 0.2,                 # Nudge labels up slightly
                    box.padding = 0.5,              # Padding around labels
                    segment.curvature = -0.1,       # Add slight curvature to lines
                    segment.ncp = 3,                # Smooth line segments
                    max.time = 2) +
    ylab("Income segregation (SROITI)") +
    xlab("Real estate segregation (SROITI)") +
    theme(axis.title.x = element_text(size = 14),
          axis.title.y = element_text(size = 14)) 
  
  correlation_3 <- ggplot(data = SROITI_2022_combined_2, aes(y = SROITI_income, x = SROITI_movable_wealth)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    geom_text_repel(aes(label = City),
                    size = 3,               
                    color = "black",      
                    alpha = 0.5,   
                    max.overlaps = Inf,             
                    force = 5,                      
                    segment.alpha = 0.2,                 
                    box.padding = 0.5,             
                    segment.curvature = -0.1,       
                    segment.ncp = 3,                
                    max.time = 2) +
    ylab("Income segregation (SROITI)") +
    xlab("Financial wealth segregation (SROITI)") +
    theme(axis.title.x = element_text(size = 14),
          axis.title.y = element_text(size = 14)) 
  
  
  correlation_4 <- ggplot(data = SROITI_2022_combined_2, aes(y = SROITI_wealth, x = SROITI_real_estate)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    geom_text_repel(aes(label = City),
                    size = 3,               
                    color = "black",      
                    alpha = 0.5,   
                    max.overlaps = Inf,             
                    force = 5,                      
                    segment.alpha = 0.2,                 
                    box.padding = 0.5,             
                    segment.curvature = -0.1,       
                    segment.ncp = 3,                
                    max.time = 2) +
    ylab("Wealth segregation (SROITI)") +
    xlab("Real estate segregation (SROITI)") +
    theme(axis.title.x = element_text(size = 14),
          axis.title.y = element_text(size = 14)) 
  
  
  correlation_5 <- ggplot(data = SROITI_2022_combined_2, aes(y = SROITI_wealth, x = SROITI_movable_wealth)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    geom_text_repel(aes(label = City),
                    size = 3,               
                    color = "black",      
                    alpha = 0.5,   
                    max.overlaps = Inf,             
                    force = 5,                      
                    segment.alpha = 0.2,                 
                    box.padding = 0.5,             
                    segment.curvature = -0.1,       
                    segment.ncp = 3,                
                    max.time = 2) +
    ylab("Wealth segregation (SROITI)") +
    xlab("Financial wealth segregation (SROITI)") +
    theme(axis.title.x = element_text(size = 14),
          axis.title.y = element_text(size = 14)) 
  
  
  
  correlation_6 <- ggplot(data = SROITI_2022_combined_2, aes(y = SROITI_real_estate, x = SROITI_movable_wealth)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    geom_text_repel(aes(label = City),
                    size = 3,               
                    color = "black",      
                    alpha = 0.5,   
                    max.overlaps = Inf,             
                    force = 5,                      
                    segment.alpha = 0.2,                 
                    box.padding = 0.5,             
                    segment.curvature = -0.1,       
                    segment.ncp = 3,                
                    max.time = 2) +
    ylab("Real estate segregation (SROITI)") +
    xlab("Financial wealth segregation (SROITI)") +
    theme(axis.title.x = element_text(size = 14),
          axis.title.y = element_text(size = 14)) 
  
  
  # Combine the plots into a single grid
  combined_plots <- (correlation_1 | correlation_2 | correlation_3) /
    (correlation_4 | correlation_5 | correlation_6)
  
  # Display the combined plot
  combined_plots
  
  # Save the combined plot as a PNG file with A4 dimensions
  ggsave("combined_plots.png", plot = combined_plots, 
         width = 8.27, height = 11.69, units = "in", dpi = 300)
  
  
  
  #### With no labels #### --------------------------------------------------------
  
  # Plot 1
  correlation_1 <- ggplot(data = SROITI_2022_combined_2, aes(y = SROITI_income, x = SROITI_wealth)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    ylab("Income segregation\n(SROITI)") +
    xlab("Wealth segregation\n(SROITI)") +
    theme(axis.title.x = element_text(size = 14, face = "bold"),
          axis.title.y = element_text(size = 14, face = "bold"))
  
  # Plot 2
  correlation_2 <- ggplot(data = SROITI_2022_combined_2, aes(y = SROITI_income, x = SROITI_real_estate)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    ylab("Income segregation\n(SROITI)") +
    xlab("Real estate\nsegregation (SROITI)") +
    theme(axis.title.x = element_text(size = 14, face = "bold"),
          axis.title.y = element_text(size = 14, face = "bold"))
  
  # Plot 3
  correlation_3 <- ggplot(data = SROITI_2022_combined_2, aes(y = SROITI_income, x = SROITI_movable_wealth)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    ylab("Income segregation\n(SROITI)") +
    xlab("Financial wealth\nsegregation (SROITI)") +
    theme(axis.title.x = element_text(size = 14, face = "bold"),
          axis.title.y = element_text(size = 14, face = "bold"))
  
  # Plot 4
  correlation_4 <- ggplot(data = SROITI_2022_combined_2, aes(y = SROITI_wealth, x = SROITI_real_estate)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    ylab("Wealth segregation\n(SROITI)") +
    xlab("Real estate\nsegregation (SROITI)") +
    theme(axis.title.x = element_text(size = 14, face = "bold"),
          axis.title.y = element_text(size = 14, face = "bold"))
  
  # Plot 5
  correlation_5 <- ggplot(data = SROITI_2022_combined_2, aes(y = SROITI_wealth, x = SROITI_movable_wealth)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    ylab("Wealth segregation\n(SROITI)") +
    xlab("Financial wealth\nsegregation (SROITI)") +
    theme(axis.title.x = element_text(size = 14, face = "bold"),
          axis.title.y = element_text(size = 14, face = "bold"))
  
  # Plot 6
  correlation_6 <- ggplot(data = SROITI_2022_combined_2, aes(y = SROITI_real_estate, x = SROITI_movable_wealth)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    ylab("Real estate segregation\n(SROITI)") +
    xlab("Financial wealth\nsegregation (SROITI)") +
    theme(axis.title.x = element_text(size = 14, face = "bold"),
          axis.title.y = element_text(size = 14, face = "bold"))
  
  # Combine the plots into a single grid
  combined_plots <- (correlation_1 | correlation_2 | correlation_3) /
    (correlation_4 | correlation_5 | correlation_6)
  
  # Display the combined plot
  combined_plots
  
  # Save the combined plot as a PNG file with A4 dimensions
  ggsave("combined_plots.png", plot = combined_plots, 
         width = 8.27, height = 11.69, units = "in", dpi = 300)
  
  # Combine the plots into a grid with 2 columns and 3 rows
  combined_plots_V2 <- (correlation_1 + correlation_2 + 
                      correlation_3 + correlation_4 + 
                      correlation_5 + correlation_6) +
    plot_layout(ncol = 2, nrow = 3)
  
  # Save the combined plot as a PNG file with A4 dimensions
  ggsave("combined_plots_V2.png", plot = combined_plots_V2, 
         width = 8.27, height = 11.69, units = "in", dpi = 300)
  
  
  
  
  # Save each plot individually with square dimensions
  ggsave("plot_1.png", plot = correlation_1, width = 4, height = 4, units = "in", dpi = 300)
  ggsave("plot_2.png", plot = correlation_2, width = 4, height = 4, units = "in", dpi = 300)
  ggsave("plot_3.png", plot = correlation_3, width = 4, height = 4, units = "in", dpi = 300)
  ggsave("plot_4.png", plot = correlation_4, width = 4, height = 4, units = "in", dpi = 300)
  ggsave("plot_5.png", plot = correlation_5, width = 4, height = 4, units = "in", dpi = 300)
  ggsave("plot_6.png", plot = correlation_6, width = 4, height = 4, units = "in", dpi = 300)
  
  
  # Calculate correlation
  cor(SROITI_2022_combined_2$SROITI_wealth, SROITI_2022_combined_2$SROITI_income)
  cor(SROITI_2022_combined_2$SROITI_income, SROITI_2022_combined_2$SROITI_real_estate)
  cor(SROITI_2022_combined_2$SROITI_income, SROITI_2022_combined_2$SROITI_movable_wealth)
  cor(SROITI_2022_combined_2$SROITI_wealth, SROITI_2022_combined_2$SROITI_real_estate)
  cor(SROITI_2022_combined_2$SROITI_wealth, SROITI_2022_combined_2$SROITI_movable_wealth)
  cor(SROITI_2022_combined_2$SROITI_real_estate, SROITI_2022_combined_2$SROITI_movable_wealth)
  
  # Calculating R2 for each pair by squaring the correlation
  R2_wealth_income <- cor(SROITI_2022_combined_2$SROITI_wealth, SROITI_2022_combined_2$SROITI_income)^2
  R2_income_real_estate <- cor(SROITI_2022_combined_2$SROITI_income, SROITI_2022_combined_2$SROITI_real_estate)^2
  R2_income_movable_wealth <- cor(SROITI_2022_combined_2$SROITI_income, SROITI_2022_combined_2$SROITI_movable_wealth)^2
  R2_wealth_real_estate <- cor(SROITI_2022_combined_2$SROITI_wealth, SROITI_2022_combined_2$SROITI_real_estate)^2
  R2_wealth_movable_wealth <- cor(SROITI_2022_combined_2$SROITI_wealth, SROITI_2022_combined_2$SROITI_movable_wealth)^2
  R2_real_estate_movable_wealth <- cor(SROITI_2022_combined_2$SROITI_real_estate, SROITI_2022_combined_2$SROITI_movable_wealth)^2
  
  # Display results
  R2_values <- c(R2_wealth_income, R2_income_real_estate, R2_income_movable_wealth, 
                 R2_wealth_real_estate, R2_wealth_movable_wealth, R2_real_estate_movable_wealth)
  names(R2_values) <- c("wealth_income", "income_real_estate", "income_movable_wealth", 
                        "wealth_real_estate", "wealth_movable_wealth", "real_estate_movable_wealth")
  print(R2_values)
  
  # Calculate mean
  mean(SROITI_2022_combined_2$SROITI_wealth)
  mean(SROITI_2022_combined_2$SROITI_real_estate)
  mean(SROITI_2022_combined_2$SROITI_movable_wealth)
  mean(SROITI_2022_combined_2$SROITI_income)
  
  # Calculate SD
  sd(SROITI_2022_combined_2$SROITI_wealth)
  sd(SROITI_2022_combined_2$SROITI_real_estate)
  sd(SROITI_2022_combined_2$SROITI_movable_wealth)
  sd(SROITI_2022_combined_2$SROITI_income)
  
  
  
  
  
  
  # Plot
  correlation_1 <- ggplot(data = SROITI_2022_combined_2, aes(x = SROITI_wealth, y = SROITI_income)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    geom_text_repel(aes(label = City),
                    size = 3,               # Smaller text size
                    color = "black",      
                    alpha = 0.5,             # Slight transparency
                    max.overlaps = 23) +
    ylab("Income segregation (SROITI)") +
    xlab("Wealth segregation (SROITI)") +
    theme(axis.title.x = element_text(size = 14),
          axis.title.y = element_text(size = 14)) 
  
  # Calculate correlation
  cor(SROITI_2022_combined_2$SROITI_wealth, SROITI_2022_combined_2$SROITI_income)
  cor(SROITI_2022_combined_2$SROITI_wealth, SROITI_2022_combined_2$SROITI_real_estate)
  cor(SROITI_2022_combined_2$SROITI_wealth, SROITI_2022_combined_2$SROITI_movable_wealth)
  cor(SROITI_2022_combined_2$SROITI_income, SROITI_2022_combined_2$SROITI_real_estate)
  cor(SROITI_2022_combined_2$SROITI_income, SROITI_2022_combined_2$SROITI_movable_wealth)
  
  # Plot
  ggplot(data = SROITI_2022_combined_2, aes(x = SROITI_wealth, y = SROITI_movable_wealth)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    geom_text_repel(aes(x = SROITI_wealth, y = SROITI_movable_wealth, label = City), max.overlaps = 35) +
    ylab("Movable wealth segregation (SROITI)") +
    xlab("Wealth segregation (SROITI)")
  
  # Plot
  ggplot(data = SROITI_2022_combined_2, aes(x = SROITI_wealth, y = SROITI_real_estate)) +
    ylim(0, 0.15) +
    xlim(0, 0.15) +
    geom_smooth(method = "lm", color = "#FFC20A", se = FALSE) +
    geom_point(color = "#0C7BDC") +
    geom_text_repel(aes(x = SROITI_wealth, y = SROITI_real_estate, label = City), max.overlaps = 35) +
    ylab("Real estate segregation (SROITI)") +
    xlab("Wealth segregation (SROITI)")
  
  
  #### Plot SROITI for 2011 ####
  # Filter data
  SROITI_2011_wealth <- wealth_all_cities_years_SROITI %>%
    filter(Year == 2011)
  
  ggplot(data = SROITI_2011_wealth) +
    geom_col(aes(x = reorder(City, -SROITI), y = SROITI), fill = "#0C7BDC") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(x = "FUA", y = "SROITI (500 m radius)", title = "SROITI by FUA in 2011") +
    ylim(0, 0.15)
  
  SROITI_2011_income <- income_all_cities_years_SROITI %>%
    filter(Year == 2011)
  
  ggplot(data = SROITI_2011_income) +
    geom_col(aes(x = reorder(City, -SROITI), y = SROITI), fill = "#0C7BDC") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(x = "FUA", y = "SROITI (500 m radius)", title = "SROITI by FUA in 2011") +
    ylim(0, 0.15)
  
  #### Compare SROITI between 2011 and 2022 ####
  
  # Merge data
  SROITI_comparison_wealth <- merge(SROITI_2011_wealth, SROITI_2022_wealth, by= "City")
  
  # Plot
  ggplot(data = SROITI_comparison_wealth) +
    geom_point(aes(x = reorder(City, -SROITI.y), y = SROITI.y, size = 3), color = "#0C7BDC") +
    geom_point(aes(x = reorder(City, -SROITI.y), y = SROITI.x, size = 3), color = "#FFC20A") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(x = "City", y = "SROITI", title = "SROITI Comparison: 2011 vs 2022")
  
  # Merge data
  SROITI_comparison_income <- merge(SROITI_2011_income, SROITI_2022_income, by= "City")
  
  # Plot
  ggplot(data = SROITI_comparison_income) +
    geom_point(aes(x = reorder(City, -SROITI.y), y = SROITI.y, size = 3), color = "#0C7BDC") +
    geom_point(aes(x = reorder(City, -SROITI.y), y = SROITI.x, size = 3), color = "#FFC20A") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(x = "City", y = "SROITI", title = "SROITI Comparison: 2011 vs 2022")
  
   
  # Calculate difference
  SROITI_comparison_wealth$Difference <- SROITI_comparison_wealth$SROITI.y - SROITI_comparison_wealth$SROITI.x
  SROITI_comparison_income$Difference <- SROITI_comparison_income$SROITI.y - SROITI_comparison_income$SROITI.x
  
  # Plot the difference
  Difference_wealth <- ggplot(data = SROITI_comparison_wealth) +
    geom_col(aes(x = reorder(City, -Difference), y = Difference, fill = Difference > 0)) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    scale_fill_manual(values = c("TRUE" = "#0C7BDC", "FALSE" = "#FFC20A")) +
    labs(x = "FUA", y = "Difference in SROITI", title = "SROITI Difference by FUA (2022 vs 2011)", fill = "Positive Difference")
  
  Difference_income <- ggplot(data = SROITI_comparison_income) +
    geom_col(aes(x = reorder(City, -Difference), y = Difference, fill = Difference > 0)) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    scale_fill_manual(values = c("TRUE" = "#0C7BDC", "FALSE" = "#FFC20A")) +
    labs(x = "FUA", y = "Difference in SROITI", title = "SROITI Difference by FUA (2022 vs 2011)", fill = "Positive Difference")
  
  # Plot the difference together with the values of 2022
  # Reorder the City factor by SROITI.y for consistent ordering
  SROITI_comparison_wealth <- SROITI_comparison_wealth %>%
    mutate(City = reorder(City, -SROITI.y))
  
  SROITI_comparison_income <- SROITI_comparison_income %>%
    mutate(City = reorder(City, -SROITI.y))
  
  # Plot the SROITI in 2022
  plot1 <- ggplot(data = SROITI_comparison_wealth) +
    geom_point(aes(x = City, y = SROITI.y, colour = Difference > 0), 
               size = 3, 
               shape = 16,
               show.legend = FALSE) +
    scale_colour_manual(values = c("TRUE" = "#FFC20A", "FALSE" = "#0C7BDC")) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(x = NULL, y = "SROITI", title = "Wealth segregation in 2022") +
    ylim(0, 0.15)
  
  plot1b <- ggplot(data = SROITI_comparison_income) +
    geom_point(aes(x = City, y = SROITI.y, colour = Difference > 0), 
               size = 3, 
               shape = 16,
               show.legend = FALSE) +
    scale_colour_manual(values = c("TRUE" = "#FFC20A", "FALSE" = "#0C7BDC")) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    labs(x = NULL, y = "SROITI", title = "Income segregation in 2022") +
    ylim(0, 0.15)
  
  # Plot the difference in SROITI
  plot2 <- ggplot(data = SROITI_comparison_wealth) +
    geom_col(aes(x = City, y = Difference, fill = Difference > 0), 
             show.legend = FALSE) +
    #theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    theme(axis.text.x = element_blank()) +
    scale_fill_manual(values = c("TRUE" = "#FFC20A", "FALSE" = "#0C7BDC")) +
    labs(x = NULL, y = "Difference in SROITI", title = "Differences in segregation (2022 vs 2011)")
  
  plot2b <- ggplot(data = SROITI_comparison_income) +
    geom_col(aes(x = City, y = Difference, fill = Difference > 0), 
             show.legend = FALSE) +
    #theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    theme(axis.text.x = element_blank()) +
    scale_fill_manual(values = c("TRUE" = "#FFC20A", "FALSE" = "#0C7BDC")) +
    labs(x = NULL, y = "Difference in SROITI", title = "Differences in segregation (2022 vs 2011)")
  
  # Combine the plots vertically
  plot1 / plot2
  plot1b / plot2b
  
  # Combine the plots in a single column
  combined_plot <- wrap_plots(plot1, plot2, plot1b, plot2b, ncol = 1, nrow = 4)
  
  # Save the plot as a PDF with A4 dimensions
  ggsave("combined_plot.png", plot = combined_plot, width = 8.27, height = 11.69)  # A4 dimensions in inches
  
  # Alternatively, display the plot
  combined_plot
  
  #### Plot only evolution of SROITI values #### ---------------------------------
  
  # Order cities by the level of Difference in SROITI_comparison_wealth
  SROITI_comparison_wealth <- SROITI_comparison_wealth %>%
    dplyr::mutate(City = factor(City, levels = City[order(Difference, decreasing = TRUE)]))
  
  # Apply the same city order to the income dataset
  SROITI_comparison_income <- SROITI_comparison_income %>%
    dplyr::mutate(City = factor(City, levels = levels(SROITI_comparison_wealth$City)))
  
  # Plot wealth evolution
  evolution_wealth_segregation <- ggplot(data = SROITI_comparison_wealth) +
    geom_col(aes(x = City, y = Difference, fill = Difference > 0), 
             show.legend = FALSE) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.title.y = element_text(size = 14, face = "bold"),
          plot.title = element_text(size = 16, face = "bold")) +
    scale_fill_manual(values = c("TRUE" = "#FFC20A", "FALSE" = "#0C7BDC")) +
    labs(x = NULL, y = "Difference in SROITI (2022-2011)", title = "Evolution of wealth segregation (2011-2021)")
  
  # Plot income evolution
  evolution_income_segregation <- ggplot(data = SROITI_comparison_income) +
    geom_col(aes(x = City, y = Difference, fill = Difference > 0), 
             show.legend = FALSE) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          axis.title.y = element_text(size = 14, face = "bold"),
          plot.title = element_text(size = 16, face = "bold")) +
    scale_fill_manual(values = c("TRUE" = "#FFC20A", "FALSE" = "#0C7BDC")) +
    labs(x = NULL, y = "Difference in SROITI (2022-11)", title = "Evolution of income segregation (2011-2021)")
  
  # Combine the plots vertically
  evolution_wealth_segregation / evolution_income_segregation
  
  #### Plot cities according to their wealth segregation ratio ####
  SROITI_2022_4000m <- wealth_all_cities_years_SROITI_4000m %>%
    filter(Year == 2022)
  
  SROITI_2022 <- wealth_all_cities_years_SROITI %>%
    filter(Year == 2022)
  
  SROITI_ratio_2022 <- merge(SROITI_2022, SROITI_2022_4000m, by = c("City", "Year"))
  
  # Calculate ratio
  SROITI_ratio_2022$Ratio <- SROITI_ratio_2022$SROITI.y /SROITI_ratio_2022$SROITI.x
  
  # Plot
  plot3 <- ggplot(data = SROITI_ratio_2022) +
    geom_point(aes(x = reorder(City, Ratio), y = Ratio),
               size = 3, 
               shape = 16,
               colour = "#0C7BDC") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    xlab(NULL) + ylab("Ratio (Segregation at 4000m /\n Segregation at 500m)")
  
  # Plot segregation calculated with two radiuses
  plot4 <- ggplot(data = SROITI_ratio_2022) +
    geom_point(aes(x = SROITI.x, y = SROITI.y),
               size = 3, 
               shape = 16,
               colour = "#FFC20A") +
    geom_text_repel(aes(x = SROITI.x, y = SROITI.y, label = City)) +
    xlab("Micro-scale segregation (500m)") + ylab("Macro-scale segregation (4000m)")
  
  plot3 / plot4
  
#### Appendix ####

# Plot
  plot_2 <- ggplot(wealth_type_per_year_of_birth) +
    geom_line(aes(x = Age, y = Real_estate_wealth_share, color = "Real estate wealth")) +
    geom_point(aes(x = Age, y = Real_estate_wealth_share, color = "Real estate wealth")) +
    geom_line(aes(x = Age, y = Financial_wealth_share, color = "Financial wealth")) +
    geom_point(aes(x = Age, y = Financial_wealth_share, color = "Financial wealth")) +
    scale_color_manual(
      name = "Wealth type",
      values = c("Real estate wealth" = "#FFC20A",
                 "Financial wealth" = "#0C7BDC")
    ) +
    theme_minimal() +
    scale_y_continuous(labels = percent_format(accuracy = 1)) +   # <-- y-axis as %
    labs(x = "Age", y = "Percentage of total wealth") +
    theme(
      plot.title = element_text(face = "bold"),
      axis.title.x = element_text(face = "bold"),
      axis.title.y = element_text(face = "bold"),
      #panel.grid = element_blank(),
      axis.line = element_line(color = "grey"),
      legend.position = "bottom"
    )
  
  
# Save as high-resolution PNG
ggsave("Final_graphs/Figure_1_appendix.png",
       plot = plot_2,
       width = 6.43,        # width in inches
       height = 4.09,      # height in inches
       dpi = 1200         # resolution
       )
  