##### Calculating segregation based on movable wealth #####
# 01_load_and_process_data script
# 28/10/2024
# Relevant info:
#VEHW1000VERH, # Household total assets
#VEHW1110FINH, # Financial assets
#VEHW1120ONRH, # value of real estate
#VEHW1121WONH, # Value of the own owned home
#VEHW1130ONDH, # value of the entrepreneurial capital
#VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
#VEHW1150OVEH, # Other household assets (e.g. cash)
#VEHW1200STOH, # Household debts
#VEHW1210SHYH, # mortgage debt
#####

# load the required packages
library(haven)
library(ineq)
library(DescTools)
library(tidyverse)
library(OasisR)
library(seg)
library(sf)
library(sp)

# Do not use scientific notation
options(scipen = 9999999)

#### Local environment set at a 500m radius ####
### Calculate Spatial Information Theory Index for wealth segregation in 2022 ###

# Load the data
wealth <- read.csv("raw_data/database_2022.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2022_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2021 ###

# Load the data
wealth <- read.csv("raw_data/database_2021.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2021_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2020 ###

# Load the data
wealth <- read.csv("raw_data/database_2020.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2020_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)


### Calculate Spatial Information Theory Index for wealth segregation in 2019 ###

# Load the data
wealth <- read.csv("raw_data/database_2019.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2019_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)





### Calculate Spatial Information Theory Index for wealth segregation in 2018 ###

# Load the data
wealth <- read.csv("raw_data/database_2018.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2018_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2017 ###

# Load the data
wealth <- read.csv("raw_data/database_2017.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2017_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2016 ###

# Load the data
wealth <- read.csv("raw_data/database_2016.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2016_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)


### Calculate Spatial Information Theory Index for wealth segregation in 2015 ###

# Load the data
wealth <- read.csv("raw_data/database_2015.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2015_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)



### Calculate Spatial Information Theory Index for wealth segregation in 2014 ###

# Load the data
wealth <- read.csv("raw_data/database_2014.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2014_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2013 ###

# Load the data
wealth <- read.csv("raw_data/database_2013.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2013_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)








### Calculate Spatial Information Theory Index for wealth segregation in 2012 ###

# Load the data
wealth <- read.csv("raw_data/database_2012.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2012_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)


### Calculate Spatial Information Theory Index for wealth segregation in 2011 ###

# Load the data
wealth <- read.csv("raw_data/database_2011.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2011_", City, ".csv"), row.names = FALSE)
}


# Delete data for enabling further operations
rm(wealth)



### Calculate Spatial Information Theory Index for wealth segregation in 2010 ###

# Load the data
wealth <- read.csv("raw_data/database_2010.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2010_", City, ".csv"), row.names = FALSE)
}


# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2009 ###

# Load the data
wealth <- read.csv("raw_data/database_2009.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2009_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2008 ###

# Load the data
wealth <- read.csv("raw_data/database_2008.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2008_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2007 ###

# Load the data
wealth <- read.csv("raw_data/database_2007.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2007_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)



### Calculate Spatial Information Theory Index for wealth segregation in 2006 ###

# Load the data
wealth <- read.csv("raw_data/database_2006.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 500m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 500, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_500m_2006_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)





#### Local environment calculated  using a 4000m radius ####


### Calculate Spatial Information Theory Index for wealth segregation in 2022 ###

# Load the data
wealth <- read.csv("raw_data/database_2022.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2022_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2021 ###

# Load the data
wealth <- read.csv("raw_data/database_2021.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2021_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2020 ###

# Load the data
wealth <- read.csv("raw_data/database_2020.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2020_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)


### Calculate Spatial Information Theory Index for wealth segregation in 2019 ###

# Load the data
wealth <- read.csv("raw_data/database_2019.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2019_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)





### Calculate Spatial Information Theory Index for wealth segregation in 2018 ###

# Load the data
wealth <- read.csv("raw_data/database_2018.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2018_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2017 ###

# Load the data
wealth <- read.csv("raw_data/database_2017.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2017_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2016 ###

# Load the data
wealth <- read.csv("raw_data/database_2016.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2016_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)


### Calculate Spatial Information Theory Index for wealth segregation in 2015 ###

# Load the data
wealth <- read.csv("raw_data/database_2015.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2015_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)



### Calculate Spatial Information Theory Index for wealth segregation in 2014 ###

# Load the data
wealth <- read.csv("raw_data/database_2014.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2014_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2013 ###

# Load the data
wealth <- read.csv("raw_data/database_2013.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2013_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)








### Calculate Spatial Information Theory Index for wealth segregation in 2012 ###

# Load the data
wealth <- read.csv("raw_data/database_2012.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2012_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)


### Calculate Spatial Information Theory Index for wealth segregation in 2011 ###

# Load the data
wealth <- read.csv("raw_data/database_2011.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$INHAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2011_", City, ".csv"), row.names = FALSE)
}


# Delete data for enabling further operations
rm(wealth)



### Calculate Spatial Information Theory Index for wealth segregation in 2010 ###

# Load the data
wealth <- read.csv("raw_data/database_2010.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2010_", City, ".csv"), row.names = FALSE)
}


# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2009 ###

# Load the data
wealth <- read.csv("raw_data/database_2009.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2009_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2008 ###

# Load the data
wealth <- read.csv("raw_data/database_2008.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2008_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)




### Calculate Spatial Information Theory Index for wealth segregation in 2007 ###

# Load the data
wealth <- read.csv("raw_data/database_2007.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2007_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)



### Calculate Spatial Information Theory Index for wealth segregation in 2006 ###

# Load the data
wealth <- read.csv("raw_data/database_2006.csv") # only if not already loaded

# Delete missing values of wealth
wealth <- wealth[wealth$VEHW1000VERH != 99999999999, ]

# Calculate movable wealth
wealth$Movable_wealth <- wealth$VEHW1110FINH + wealth$VEHW1130ONDH + wealth$VEHW1140ABEH + wealth$VEHW1150OVEH

# Calculate movable wealth per capita
wealth$Movable_wealth_per_capita <- (wealth$Movable_wealth/wealth$BVRAHL)

# Define all possible FUAs
cities <- c("'s-Gravenhage", "'s-Hertogenbosch", "Alkmaar","Almelo", "Alphen aan den Rijn", "Amersfoort", "Amsterdam", "Apeldoorn", "Arnhem", "Assen", "Bergen op Zoom", "Breda", "Deventer", "Ede", "Eindhoven", "Enschede", "Gouda", "Groningen", "Heerlen", "Leeuwarden", "Leiden", "Lelystad", "Maastricht", "Middelburg", "Nijmegen", "Oss", "Roosendaal", "Rotterdam", "Sittard-Geleen", "Soest", "Tilburg", "Utrecht", "Veenendaal", "Venlo", "Zwolle")

# Order values according to their wealth within every FUA
wealth <- wealth %>%
  arrange(FUA, Movable_wealth)

# Calculate the wealth percentile rank every household belongs to in their FUA
wealth <- wealth %>%
  group_by(FUA) %>% 
  mutate(Wealth_per_capita_percentile = ntile(Movable_wealth, 100))

# Clean rows of missing data 
wealth <- wealth %>%
  filter(VRLVIERKANT100M != "----------")

# Correct coordinates to make sure they represent meters and not kilometers
wealth <- wealth %>%
  mutate(VRLVIERKANT100M = gsub("(E)(\\d+)(N)(\\d+)","\\1\\200\\3\\400", VRLVIERKANT100M))

# Loop over each city (sigma = 4000m)
for (city in cities) {
  # Set the variable city for the given city
  City <- city
  
  # Re-start the original database
  df <- wealth
  
  # Create the dataframe out of the wealth database  for the FUA we are interested on
  df <- df %>%
    filter(FUA == City)
  
  # Keep only relevant columns
  df <- df %>%
    select(VRLVIERKANT100M, Wealth_per_capita_percentile)
  df$FUA <- NULL
  
  # create a pivot table
  pivot <- df %>%
    group_by(VRLVIERKANT100M, Wealth_per_capita_percentile) %>%
    summarise(Population = n()) %>%
    tidyr::pivot_wider(names_from = Wealth_per_capita_percentile, values_from = Population, values_fill = 0)
  
  # Create an element for only the coordinates of the grid cells
  grid_cells <- pivot %>%
    select(VRLVIERKANT100M)
  
  # Parse the coordinates
  grid_cells <- grid_cells %>%
    mutate(
      x = as.numeric(sub("E", "", str_extract(VRLVIERKANT100M, "E\\d+"))),
      y = as.numeric(sub("N", "", str_extract(VRLVIERKANT100M, "N\\d+"))),
    )
  
  # Delete superflous columns for coordinates
  grid_cells$VRLVIERKANT100M <- NULL
  
  # Create a spatial object for grid cells
  Spatial_grid_cells <- st_as_sf(grid_cells, coords = c("x", "y"), crs = 28992)
  coords <- st_coordinates(Spatial_grid_cells)
  
  # Create SpatialPoints object
  Spatial_grid_cells_sp <- SpatialPoints(coords, proj4string = CRS("+init=epsg:28992"))
  
  # Delete superflous columns for the income percentiles data
  pivot$VRLVIERKANT100M <- NULL
  
  # Order columns in the right order
  col_names <- as.numeric(colnames(pivot))
  
  # Re-order the dataframe
  pivot <- pivot[, order(col_names)]
  
  # Initialize a list to store each database for the income percentile pairwise calculation
  list_of_databases <- list()
  
  # Loop through each combination
  for(i in 1:(ncol(pivot)-1)) {
    # Calculate the cumulative sum of people up to the current percentile
    sumUpToCurrentPerc <- rowSums(pivot[, 1:i], na.rm = TRUE)
    
    # Calculate the sum of people above the current percentile
    sumAboveCurrentPerc <- rowSums(pivot[, (i+1):ncol(pivot)], na.rm = TRUE)
    
    # Create a new dataframe for the current percentile
    newData <- data.frame(sumUpToCurrentPerc = sumUpToCurrentPerc, sumAboveCurrentPerc = sumAboveCurrentPerc)
    
    # Store it in the list
    list_of_databases[[i]] <- newData
  }
  
  # Extract specific databases from it
  indices <- seq(1,99)
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    assign(database_name,list_of_databases[[i]])
  }
  
  # Initialize a list for storing SITI values
  SITI_results <- list()
  
  # Loop through the indices and calculate SITI results
  for (i in indices) {
    database_name <- paste0("database_IncPer_", i)
    SITI_results_name <- paste0("SITI_result", i)
    
    # Calculate SITI
    assign(SITI_results_name, spseg(x = Spatial_grid_cells_sp, data = get(database_name), method = "information", smoothing = "kernel", sigma = 4000, useC = FALSE))
    
    # Store the result in the list
    SITI_results[[SITI_results_name]] <- get(SITI_results_name)
    
  }
  
  # Transform the results into something readable
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    
    #Transform the SITI result
    assign(SITI_result_name, get(SITI_result_name)@h)
  }
  
  
  # Prepare them for being saved and plotted
  combined_data_list <- list()
  for (i in indices) {
    SITI_result_name <- paste0("SITI_result", i)
    df <- data.frame(Value = get(SITI_result_name), Group = as.character(i))
    combined_data_list[[SITI_result_name]] <- df
  }
  
  # Make them be in a single database
  combined_data <- do.call(rbind, combined_data_list)
  
  # Convert the Group column to a factor with custom levels
  combined_data$Group <- factor(combined_data$Group, levels = as.character(indices))
  
  # Save the data
  write.csv(combined_data, file = paste0("Wealth_per_capita_excluding_real_estate_segregation_4000m_2006_", City, ".csv"), row.names = FALSE)
}

# Delete data for enabling further operations
rm(wealth)

