##### Calculating wealth segregation taking into account wealth per capita #####
# 01_load_and_process_data script
# 23/06/2024
#####

# load the required packages
library(haven)
library(ineq)
library(DescTools)
library(tidyverse)
library(OasisR)
library(seg)
library(sf)
library(sp)

# Do not use scientific notation
options(scipen = 9999999)

#### Local environment set at a 500m radius ####
### Calculate Spatial Information Theory Index for wealth segregation in 2022 ###

# Load the data
wealth2022 <- read.csv("raw_data/database_2022.csv") # only if not already loaded

# Delete missing values of wealth
wealth2022 <- wealth2022[wealth2022$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2022$Wealth_per_capita <- (wealth2022$VEHW1000VERH/wealth2022$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2022 <- wealth2022 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2022 <- wealth2022 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2022 <- wealth2022 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2022 <- wealth2022 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2022
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2022_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2022)




### Calculate Spatial Information Theory Index for wealth segregation in 2021 ###

# Load the data
wealth2021 <- read.csv("raw_data/database_2021.csv") # only if not already loaded

# Delete missing values of wealth
wealth2021 <- wealth2021[wealth2021$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2021$Wealth_per_capita <- (wealth2021$VEHW1000VERH/wealth2021$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2021 <- wealth2021 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2021 <- wealth2021 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2021 <- wealth2021 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2021 <- wealth2021 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2021
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2021_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2021)




### Calculate Spatial Information Theory Index for wealth segregation in 2020 ###

# Load the data
wealth2020 <- read.csv("raw_data/database_2020.csv") # only if not already loaded

# Delete missing values of wealth
wealth2020 <- wealth2020[wealth2020$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2020$Wealth_per_capita <- (wealth2020$VEHW1000VERH/wealth2020$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2020 <- wealth2020 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2020 <- wealth2020 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2020 <- wealth2020 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2020 <- wealth2020 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2020
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2020_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2020)


### Calculate Spatial Information Theory Index for wealth segregation in 2019 ###

# Load the data
wealth2019 <- read.csv("raw_data/database_2019.csv") # only if not already loaded

# Delete missing values of wealth
wealth2019 <- wealth2019[wealth2019$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2019$Wealth_per_capita <- (wealth2019$VEHW1000VERH/wealth2019$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2019 <- wealth2019 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2019 <- wealth2019 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2019 <- wealth2019 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2019 <- wealth2019 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2019
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2019_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2019)





### Calculate Spatial Information Theory Index for wealth segregation in 2018 ###

# Load the data
wealth2018 <- read.csv("raw_data/database_2018.csv") # only if not already loaded

# Delete missing values of wealth
wealth2018 <- wealth2018[wealth2018$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2018$Wealth_per_capita <- (wealth2018$VEHW1000VERH/wealth2018$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2018 <- wealth2018 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2018 <- wealth2018 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2018 <- wealth2018 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2018 <- wealth2018 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2018
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2018_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2018)




### Calculate Spatial Information Theory Index for wealth segregation in 2017 ###

# Load the data
wealth2017 <- read.csv("raw_data/database_2017.csv") # only if not already loaded

# Delete missing values of wealth
wealth2017 <- wealth2017[wealth2017$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2017$Wealth_per_capita <- (wealth2017$VEHW1000VERH/wealth2017$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2017 <- wealth2017 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2017 <- wealth2017 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2017 <- wealth2017 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2017 <- wealth2017 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2017
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2017_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2017)




### Calculate Spatial Information Theory Index for wealth segregation in 2016 ###

# Load the data
wealth2016 <- read.csv("raw_data/database_2016.csv") # only if not already loaded

# Delete missing values of wealth
wealth2016 <- wealth2016[wealth2016$VEHW1000VERH != 99999999999, ]


# Calculate wealth per capita in each household
wealth2016$Wealth_per_capita <- (wealth2016$VEHW1000VERH/wealth2016$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2016 <- wealth2016 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2016 <- wealth2016 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2016 <- wealth2016 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2016 <- wealth2016 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2016
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2016_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2016)


### Calculate Spatial Information Theory Index for wealth segregation in 2015 ###

# Load the data
wealth2015 <- read.csv("raw_data/database_2015.csv") # only if not already loaded

# Delete missing values of wealth
wealth2015 <- wealth2015[wealth2015$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2015$Wealth_per_capita <- (wealth2015$VEHW1000VERH/wealth2015$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2015 <- wealth2015 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2015 <- wealth2015 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2015 <- wealth2015 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2015 <- wealth2015 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2015
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2015_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2015)



### Calculate Spatial Information Theory Index for wealth segregation in 2014 ###

# Load the data
wealth2014 <- read.csv("raw_data/database_2014.csv") # only if not already loaded

# Delete missing values of wealth
wealth2014 <- wealth2014[wealth2014$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2014$Wealth_per_capita <- (wealth2014$VEHW1000VERH/wealth2014$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2014 <- wealth2014 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2014 <- wealth2014 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2014 <- wealth2014 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2014 <- wealth2014 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2014
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2014_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2014)




### Calculate Spatial Information Theory Index for wealth segregation in 2013 ###

# Load the data
wealth2013 <- read.csv("raw_data/database_2013.csv") # only if not already loaded

# Delete missing values of wealth
wealth2013 <- wealth2013[wealth2013$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2013$Wealth_per_capita <- (wealth2013$VEHW1000VERH/wealth2013$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2013 <- wealth2013 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2013 <- wealth2013 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2013 <- wealth2013 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2013 <- wealth2013 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2013
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2013_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2013)








### Calculate Spatial Information Theory Index for wealth segregation in 2012 ###

# Load the data
wealth2012 <- read.csv("raw_data/database_2012.csv") # only if not already loaded

# Delete missing values of wealth
wealth2012 <- wealth2012[wealth2012$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2012$Wealth_per_capita <- (wealth2012$VEHW1000VERH/wealth2012$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2012 <- wealth2012 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2012 <- wealth2012 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2012 <- wealth2012 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2012 <- wealth2012 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2012
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2012_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2012)


### Calculate Spatial Information Theory Index for wealth segregation in 2011 ###

# Load the data
wealth2011 <- read.csv("raw_data/database_2011.csv") # only if not already loaded

# Delete missing values of wealth
wealth2011 <- wealth2011[wealth2011$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2011$Wealth_per_capita <- (wealth2011$VEHW1000VERH/wealth2011$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2011 <- wealth2011 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2011 <- wealth2011 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2011 <- wealth2011 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2011 <- wealth2011 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2011
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2011_", City, ".csv"), row.names = FALSE)
}


# Delete data for enabling further operations
rm(wealth2011)



### Calculate Spatial Information Theory Index for wealth segregation in 2010 ###

# Load the data
wealth2010 <- read.csv("raw_data/database_2010.csv") # only if not already loaded

# Delete missing values of wealth
wealth2010 <- wealth2010[wealth2010$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2010$Wealth_per_capita <- (wealth2010$VEHW1000VERH/wealth2010$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2010 <- wealth2010 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2010 <- wealth2010 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2010 <- wealth2010 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2010 <- wealth2010 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2010
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2010_", City, ".csv"), row.names = FALSE)
}


# Delete data for enabling further operations
rm(wealth2010)




### Calculate Spatial Information Theory Index for wealth segregation in 2009 ###

# Load the data
wealth2009 <- read.csv("raw_data/database_2009.csv") # only if not already loaded

# Delete missing values of wealth
wealth2009 <- wealth2009[wealth2009$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2009$Wealth_per_capita <- (wealth2009$VEHW1000VERH/wealth2009$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2009 <- wealth2009 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2009 <- wealth2009 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2009 <- wealth2009 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2009 <- wealth2009 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2009
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2009_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2009)




### Calculate Spatial Information Theory Index for wealth segregation in 2008 ###

# Load the data
wealth2008 <- read.csv("raw_data/database_2008.csv") # only if not already loaded

# Delete missing values of wealth
wealth2008 <- wealth2008[wealth2008$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2008$Wealth_per_capita <- (wealth2008$VEHW1000VERH/wealth2008$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2008 <- wealth2008 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2008 <- wealth2008 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2008 <- wealth2008 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2008 <- wealth2008 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2008
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2008_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2008)




### Calculate Spatial Information Theory Index for wealth segregation in 2007 ###

# Load the data
wealth2007 <- read.csv("raw_data/database_2007.csv") # only if not already loaded

# Delete missing values of wealth
wealth2007 <- wealth2007[wealth2007$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2007$Wealth_per_capita <- (wealth2007$VEHW1000VERH/wealth2007$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2007 <- wealth2007 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2007 <- wealth2007 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2007 <- wealth2007 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2007 <- wealth2007 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2007
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2007_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2007)



### Calculate Spatial Information Theory Index for wealth segregation in 2006 ###

# Load the data
wealth2006 <- read.csv("raw_data/database_2006.csv") # only if not already loaded

# Delete missing values of wealth
wealth2006 <- wealth2006[wealth2006$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2006$Wealth_per_capita <- (wealth2006$VEHW1000VERH/wealth2006$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2006 <- wealth2006 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2006 <- wealth2006 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2006 <- wealth2006 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2006 <- wealth2006 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2006
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_500m_2006_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2006)





#### Local environment calculated  using a 4000m radius ####


### Calculate Spatial Information Theory Index for wealth segregation in 2022 ###

# Load the data
wealth2022 <- read.csv("raw_data/database_2022.csv") # only if not already loaded

# Delete missing values of wealth
wealth2022 <- wealth2022[wealth2022$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2022$Wealth_per_capita <- (wealth2022$VEHW1000VERH/wealth2022$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2022 <- wealth2022 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2022 <- wealth2022 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2022 <- wealth2022 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2022 <- wealth2022 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2022
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2022_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2022)




### Calculate Spatial Information Theory Index for wealth segregation in 2021 ###

# Load the data
wealth2021 <- read.csv("raw_data/database_2021.csv") # only if not already loaded

# Delete missing values of wealth
wealth2021 <- wealth2021[wealth2021$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2021$Wealth_per_capita <- (wealth2021$VEHW1000VERH/wealth2021$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2021 <- wealth2021 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2021 <- wealth2021 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2021 <- wealth2021 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2021 <- wealth2021 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2021
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2021_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2021)




### Calculate Spatial Information Theory Index for wealth segregation in 2020 ###

# Load the data
wealth2020 <- read.csv("raw_data/database_2020.csv") # only if not already loaded

# Delete missing values of wealth
wealth2020 <- wealth2020[wealth2020$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2020$Wealth_per_capita <- (wealth2020$VEHW1000VERH/wealth2020$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2020 <- wealth2020 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2020 <- wealth2020 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2020 <- wealth2020 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2020 <- wealth2020 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2020
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2020_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2020)


### Calculate Spatial Information Theory Index for wealth segregation in 2019 ###

# Load the data
wealth2019 <- read.csv("raw_data/database_2019.csv") # only if not already loaded

# Delete missing values of wealth
wealth2019 <- wealth2019[wealth2019$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2019$Wealth_per_capita <- (wealth2019$VEHW1000VERH/wealth2019$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2019 <- wealth2019 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2019 <- wealth2019 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2019 <- wealth2019 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2019 <- wealth2019 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2019
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2019_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2019)





### Calculate Spatial Information Theory Index for wealth segregation in 2018 ###

# Load the data
wealth2018 <- read.csv("raw_data/database_2018.csv") # only if not already loaded

# Delete missing values of wealth
wealth2018 <- wealth2018[wealth2018$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2018$Wealth_per_capita <- (wealth2018$VEHW1000VERH/wealth2018$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2018 <- wealth2018 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2018 <- wealth2018 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2018 <- wealth2018 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2018 <- wealth2018 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2018
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2018_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2018)




### Calculate Spatial Information Theory Index for wealth segregation in 2017 ###

# Load the data
wealth2017 <- read.csv("raw_data/database_2017.csv") # only if not already loaded

# Delete missing values of wealth
wealth2017 <- wealth2017[wealth2017$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2017$Wealth_per_capita <- (wealth2017$VEHW1000VERH/wealth2017$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2017 <- wealth2017 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2017 <- wealth2017 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2017 <- wealth2017 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2017 <- wealth2017 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2017
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2017_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2017)




### Calculate Spatial Information Theory Index for wealth segregation in 2016 ###

# Load the data
wealth2016 <- read.csv("raw_data/database_2016.csv") # only if not already loaded

# Delete missing values of wealth
wealth2016 <- wealth2016[wealth2016$VEHW1000VERH != 99999999999, ]


# Calculate wealth per capita in each household
wealth2016$Wealth_per_capita <- (wealth2016$VEHW1000VERH/wealth2016$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2016 <- wealth2016 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2016 <- wealth2016 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2016 <- wealth2016 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2016 <- wealth2016 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2016
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2016_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2016)


### Calculate Spatial Information Theory Index for wealth segregation in 2015 ###

# Load the data
wealth2015 <- read.csv("raw_data/database_2015.csv") # only if not already loaded

# Delete missing values of wealth
wealth2015 <- wealth2015[wealth2015$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2015$Wealth_per_capita <- (wealth2015$VEHW1000VERH/wealth2015$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2015 <- wealth2015 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2015 <- wealth2015 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2015 <- wealth2015 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2015 <- wealth2015 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2015
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2015_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2015)



### Calculate Spatial Information Theory Index for wealth segregation in 2014 ###

# Load the data
wealth2014 <- read.csv("raw_data/database_2014.csv") # only if not already loaded

# Delete missing values of wealth
wealth2014 <- wealth2014[wealth2014$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2014$Wealth_per_capita <- (wealth2014$VEHW1000VERH/wealth2014$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2014 <- wealth2014 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2014 <- wealth2014 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2014 <- wealth2014 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2014 <- wealth2014 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2014
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2014_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2014)




### Calculate Spatial Information Theory Index for wealth segregation in 2013 ###

# Load the data
wealth2013 <- read.csv("raw_data/database_2013.csv") # only if not already loaded

# Delete missing values of wealth
wealth2013 <- wealth2013[wealth2013$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2013$Wealth_per_capita <- (wealth2013$VEHW1000VERH/wealth2013$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2013 <- wealth2013 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2013 <- wealth2013 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2013 <- wealth2013 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2013 <- wealth2013 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2013
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2013_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2013)








### Calculate Spatial Information Theory Index for wealth segregation in 2012 ###

# Load the data
wealth2012 <- read.csv("raw_data/database_2012.csv") # only if not already loaded

# Delete missing values of wealth
wealth2012 <- wealth2012[wealth2012$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2012$Wealth_per_capita <- (wealth2012$VEHW1000VERH/wealth2012$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2012 <- wealth2012 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2012 <- wealth2012 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2012 <- wealth2012 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2012 <- wealth2012 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2012
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2012_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2012)


### Calculate Spatial Information Theory Index for wealth segregation in 2011 ###

# Load the data
wealth2011 <- read.csv("raw_data/database_2011.csv") # only if not already loaded

# Delete missing values of wealth
wealth2011 <- wealth2011[wealth2011$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2011$Wealth_per_capita <- (wealth2011$VEHW1000VERH/wealth2011$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2011 <- wealth2011 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2011 <- wealth2011 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2011 <- wealth2011 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2011 <- wealth2011 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2011
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2011_", City, ".csv"), row.names = FALSE)
}


# Delete data for enabling further operations
rm(wealth2011)



### Calculate Spatial Information Theory Index for wealth segregation in 2010 ###

# Load the data
wealth2010 <- read.csv("raw_data/database_2010.csv") # only if not already loaded

# Delete missing values of wealth
wealth2010 <- wealth2010[wealth2010$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2010$Wealth_per_capita <- (wealth2010$VEHW1000VERH/wealth2010$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2010 <- wealth2010 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2010 <- wealth2010 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2010 <- wealth2010 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2010 <- wealth2010 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2010
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2010_", City, ".csv"), row.names = FALSE)
}


# Delete data for enabling further operations
rm(wealth2010)




### Calculate Spatial Information Theory Index for wealth segregation in 2009 ###

# Load the data
wealth2009 <- read.csv("raw_data/database_2009.csv") # only if not already loaded

# Delete missing values of wealth
wealth2009 <- wealth2009[wealth2009$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2009$Wealth_per_capita <- (wealth2009$VEHW1000VERH/wealth2009$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2009 <- wealth2009 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2009 <- wealth2009 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2009 <- wealth2009 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2009 <- wealth2009 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2009
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2009_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2009)




### Calculate Spatial Information Theory Index for wealth segregation in 2008 ###

# Load the data
wealth2008 <- read.csv("raw_data/database_2008.csv") # only if not already loaded

# Delete missing values of wealth
wealth2008 <- wealth2008[wealth2008$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2008$Wealth_per_capita <- (wealth2008$VEHW1000VERH/wealth2008$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2008 <- wealth2008 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2008 <- wealth2008 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2008 <- wealth2008 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2008 <- wealth2008 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2008
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2008_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2008)




### Calculate Spatial Information Theory Index for wealth segregation in 2007 ###

# Load the data
wealth2007 <- read.csv("raw_data/database_2007.csv") # only if not already loaded

# Delete missing values of wealth
wealth2007 <- wealth2007[wealth2007$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2007$Wealth_per_capita <- (wealth2007$VEHW1000VERH/wealth2007$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2007 <- wealth2007 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2007 <- wealth2007 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2007 <- wealth2007 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2007 <- wealth2007 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2007
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2007_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2007)



### Calculate Spatial Information Theory Index for wealth segregation in 2006 ###

# Load the data
wealth2006 <- read.csv("raw_data/database_2006.csv") # only if not already loaded

# Delete missing values of wealth
wealth2006 <- wealth2006[wealth2006$VEHW1000VERH != 99999999999, ]

# Calculate wealth per capita in each household
wealth2006$Wealth_per_capita <- (wealth2006$VEHW1000VERH/wealth2006$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth2006 <- wealth2006 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth2006 <- wealth2006 %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Wealth_per_capita, 100))

# Clean rows of missing data 
wealth2006 <- wealth2006 %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth2006 <- wealth2006 %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth2006
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_segregation_4000m_2006_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth2006)

