##### Mapping segregation and getting data necessary for further analyses #####

#### Preparation #### ----------------------------------------------------------
# load the required packages
library(haven)
library(ineq)
library(DescTools)
library(tidyverse)
library(OasisR)
library(seg)
library(sf)
library(sp)

# Do not use scientific notation
options(scipen = 9999999)

# Load the data
data2022 <- read.csv("raw_data/database_2022.csv") # only if not already loaded

# Delete missing values of income and wealth
data2022 <- data2022[data2022$INHBESTINKH != 9999999999, ]
data2022 <- data2022[data2022$VEHW1000VERH != 99999999999, ]

# Calculate wealth and income per capita in each household
data2022$Wealth_per_capita <- (data2022$VEHW1000VERH/data2022$INHAHL)
data2022$Income_per_capita <- (data2022$INHBESTINKH/data2022$INHAHL)


#### Calculate and get the data necessary for plotting the wealth-poorest 20% #### ----------------------------------------------------------

# Order them by wealth in every FUA
data2022 <- data2022 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
data2022 <- data2022 %>%
  group_by(FUA) %>% 
  mutate(wealth_percentile = ntile(Wealth_per_capita, 100))

# Count the amount of households per category
category_counts <- data2022 %>%
  group_by(wealth_percentile) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

# Summarize the data by grid cell
grid_cell_data <- data2022 %>%
  group_by(VRLVIERKANT500M, FUA, wealth_percentile) %>%
  summarize(n_households = n(), .groups = "drop")

# Re-organize the dataset
grid_cell_data_percentiles <- grid_cell_data %>%
  pivot_wider(names_from = wealth_percentile,
              values_from = n_households,
              names_prefix = "percentile_",
              values_fill = list(n_households = 0))

# Count the amount of households per percentile
total_households_per_percentile <- colSums(grid_cell_data_percentiles[, paste0("percentile_", 1:100)])

# Get the total population per grid cell
grid_cell_data_percentiles <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))))

# Re-organize and calculate the number of households under percentile 21
grid_cell_data_poorest20_wealth <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))),
         under_percentile_21 = rowSums(select(., matches("percentile_([1-9]$|1[0-9]$|20$)"))))

# Get a dataframe with for each grid cell
grid_cell_data_poorest20_wealth <- grid_cell_data_poorest20_wealth  %>%
  select(VRLVIERKANT500M, FUA, under_percentile_21, total_households)

# Select only grid cells with more than 9 observations
grid_cell_data_poorest20_wealth <- grid_cell_data_poorest20_wealth %>%
  mutate(under_percentile_21 = ifelse(under_percentile_21 < 10, NA, under_percentile_21))
grid_cell_data_poorest20_wealth <- grid_cell_data_poorest20_wealth %>%
  mutate(total_households = ifelse(total_households < 10, NA, total_households))

#### Calculate and get the data necessary for plotting the income-poorest 20% #### ----------------------------------------------------------

# Order them by income in every FUA
data2022 <- data2022 %>%
  arrange(FUA, Income_per_capita)

# Calculate the income percentile rank every household belongs to in their FUA
data2022 <- data2022 %>%
  group_by(FUA) %>% 
  mutate(income_percentile = ntile(Income_per_capita, 100))

# Count the amount of households per category
category_counts <- data2022 %>%
  group_by(income_percentile) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

# Summarize the data by grid cell
grid_cell_data <- data2022 %>%
  group_by(VRLVIERKANT500M, FUA, income_percentile) %>%
  summarize(n_households = n(), .groups = "drop")

# Re-organize the dataset
grid_cell_data_percentiles <- grid_cell_data %>%
  pivot_wider(names_from = income_percentile,
              values_from = n_households,
              names_prefix = "percentile_",
              values_fill = list(n_households = 0))

# Count the amount of households per percentile
total_households_per_percentile <- colSums(grid_cell_data_percentiles[, paste0("percentile_", 1:100)])

# Get the total population per grid cell
grid_cell_data_percentiles <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))))

# Re-organize and calculate the number of households under percentile 21
grid_cell_data_poorest20_income <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))),
         under_percentile_21 = rowSums(select(., matches("percentile_([1-9]$|1[0-9]$|20$)"))))

# Get a dataframe with for each grid cell
grid_cell_data_poorest20_income <- grid_cell_data_poorest20_income  %>%
  select(VRLVIERKANT500M, FUA, under_percentile_21, total_households)

# Select only grid cells with more than 9 observations
grid_cell_data_poorest20_income <- grid_cell_data_poorest20_income %>%
  mutate(under_percentile_21 = ifelse(under_percentile_21 < 10, NA, under_percentile_21))
grid_cell_data_poorest20_income <- grid_cell_data_poorest20_income %>%
  mutate(total_households = ifelse(total_households < 10, NA, total_households))

#### Merge the two databases regarding the poorest 20% #### ----------------------

# Re-name columns
grid_cell_data_poorest20_wealth$Poorest_20_wealth <- grid_cell_data_poorest20_wealth$under_percentile_21
grid_cell_data_poorest20_wealth$under_percentile_21 <- NULL
grid_cell_data_poorest20_income$Poorest_20_income <- grid_cell_data_poorest20_income$under_percentile_21
grid_cell_data_poorest20_income$under_percentile_21 <- NULL

# Merge
grid_cell_data_poorest20 <- merge(grid_cell_data_poorest20_wealth, grid_cell_data_poorest20_income, by = c("VRLVIERKANT500M", "FUA", "total_households"))

# Calculate percentages
grid_cell_data_poorest20$Share__wealth_poorest_20 <- grid_cell_data_poorest20$Poorest_20_wealth / grid_cell_data_poorest20$total_households
grid_cell_data_poorest20$Share__income_poorest_20 <- grid_cell_data_poorest20$Poorest_20_income / grid_cell_data_poorest20$total_households


#### Calculate and get the data necessary for plotting the wealth-richest 20% #### ----------------------------------------------------------

# Order them by wealth in every FUA
data2022 <- data2022 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
data2022 <- data2022 %>%
  group_by(FUA) %>% 
  mutate(wealth_percentile = ntile(Wealth_per_capita, 100))

# Count the amount of households per category
category_counts <- data2022 %>%
  group_by(wealth_percentile) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

# Summarize the data by grid cell
grid_cell_data <- data2022 %>%
  group_by(VRLVIERKANT500M, FUA, wealth_percentile) %>%
  summarize(n_households = n(), .groups = "drop")

# Re-organize the dataset
grid_cell_data_percentiles <- grid_cell_data %>%
  pivot_wider(names_from = wealth_percentile,
              values_from = n_households,
              names_prefix = "percentile_",
              values_fill = list(n_households = 0))

# Count the amount of households per percentile
total_households_per_percentile <- colSums(grid_cell_data_percentiles[, paste0("percentile_", 1:100)])

# Get the total population per grid cell
grid_cell_data_percentiles <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))))

# Re-organize and calculate the number of households above percentile 80
grid_cell_data_richest20_wealth <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))),
         above_percentile_80 = rowSums(select(., matches("percentile_(8[1-9]$|9[0-9]$|100$)"))))

# Get a dataframe with for each grid cell
grid_cell_data_richest20_wealth <- grid_cell_data_richest20_wealth  %>%
  select(VRLVIERKANT500M, FUA, above_percentile_80, total_households)

# Select only grid cells with more than 9 observations
grid_cell_data_richest20_wealth <- grid_cell_data_richest20_wealth %>%
  mutate(above_percentile_80 = ifelse(above_percentile_80 < 10, NA, above_percentile_80))
grid_cell_data_richest20_wealth <- grid_cell_data_richest20_wealth %>%
  mutate(total_households = ifelse(total_households < 10, NA, total_households))

#### Calculate and get the data necessary for plotting the income-richest 20% #### ----------------------------------------------------------

# Order them by income in every FUA
data2022 <- data2022 %>%
  arrange(FUA, Income_per_capita)

# Calculate the income percentile rank every household belongs to in their FUA
data2022 <- data2022 %>%
  group_by(FUA) %>% 
  mutate(income_percentile = ntile(Income_per_capita, 100))

# Count the amount of households per category
category_counts <- data2022 %>%
  group_by(income_percentile) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

# Summarize the data by grid cell
grid_cell_data <- data2022 %>%
  group_by(VRLVIERKANT500M, FUA, income_percentile) %>%
  summarize(n_households = n(), .groups = "drop")

# Re-organize the dataset
grid_cell_data_percentiles <- grid_cell_data %>%
  pivot_wider(names_from = income_percentile,
              values_from = n_households,
              names_prefix = "percentile_",
              values_fill = list(n_households = 0))

# Count the amount of households per percentile
total_households_per_percentile <- colSums(grid_cell_data_percentiles[, paste0("percentile_", 1:100)])

# Get the total population per grid cell
grid_cell_data_percentiles <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))))

# Re-organize and calculate the number of households above percentile 20
grid_cell_data_richest20_income <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))),
         above_percentile_80 = rowSums(select(., matches("percentile_(8[1-9]$|9[0-9]$|100$)"))))

# Get a dataframe with for each grid cell
grid_cell_data_richest20_income <- grid_cell_data_richest20_income  %>%
  select(VRLVIERKANT500M, FUA, above_percentile_80, total_households)

# Select only grid cells with more than 9 observations
grid_cell_data_richest20_income <- grid_cell_data_richest20_income %>%
  mutate(above_percentile_80 = ifelse(above_percentile_80 < 10, NA, above_percentile_80))
grid_cell_data_richest20_income <- grid_cell_data_richest20_income %>%
  mutate(total_households = ifelse(total_households < 10, NA, total_households))


#### Merge the two databases regarding the richest 20% #### ----------------------

# Re-name columns
grid_cell_data_richest20_wealth$Richest_20_wealth <- grid_cell_data_richest20_wealth$above_percentile_80
grid_cell_data_richest20_wealth$above_percentile_80 <- NULL
grid_cell_data_richest20_income$Richest_20_income <- grid_cell_data_richest20_income$above_percentile_80
grid_cell_data_richest20_income$above_percentile_80 <- NULL

# Merge
grid_cell_data_richest20 <- merge(grid_cell_data_richest20_wealth, grid_cell_data_richest20_income, by = c("VRLVIERKANT500M", "FUA", "total_households"))

# Calculate percentages
grid_cell_data_richest20$Share__wealth_richest_20 <- grid_cell_data_richest20$Richest_20_wealth / grid_cell_data_richest20$total_households
grid_cell_data_richest20$Share__income_richest_20 <- grid_cell_data_richest20$Richest_20_income / grid_cell_data_richest20$total_households

#### Merge all databases #### ---------------------------------------------------
grid_cell_data_map <- merge(grid_cell_data_poorest20, grid_cell_data_richest20, by = c("VRLVIERKANT500M", "FUA", "total_households"))

# Get a database clean of NAs
grid_cell_data_map_clean <- grid_cell_data_map[complete.cases(grid_cell_data_map), ]






#### Repeat the process for smaller grid cells #### ----------------------------------------------------------

#### Calculate and get the data necessary for plotting the wealth-poorest 20% #### ----------------------------------------------------------

# Order them by wealth in every FUA
data2022 <- data2022 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
data2022 <- data2022 %>%
  group_by(FUA) %>% 
  mutate(wealth_percentile = ntile(Wealth_per_capita, 100))

# Count the amount of households per category
category_counts <- data2022 %>%
  group_by(wealth_percentile) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

# Summarize the data by grid cell
grid_cell_data <- data2022 %>%
  group_by(VRLVIERKANT100M, FUA, wealth_percentile) %>%
  summarize(n_households = n(), .groups = "drop")

# Re-organize the dataset
grid_cell_data_percentiles <- grid_cell_data %>%
  pivot_wider(names_from = wealth_percentile,
              values_from = n_households,
              names_prefix = "percentile_",
              values_fill = list(n_households = 0))

# Count the amount of households per percentile
total_households_per_percentile <- colSums(grid_cell_data_percentiles[, paste0("percentile_", 1:100)])

# Get the total population per grid cell
grid_cell_data_percentiles <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))))

# Re-organize and calculate the number of households under percentile 21
grid_cell_data_poorest20_wealth <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))),
         under_percentile_21 = rowSums(select(., matches("percentile_([1-9]$|1[0-9]$|20$)"))))

# Get a dataframe with for each grid cell
grid_cell_data_poorest20_wealth <- grid_cell_data_poorest20_wealth  %>%
  select(VRLVIERKANT100M, FUA, under_percentile_21, total_households)

# Select only grid cells with more than 9 observations
grid_cell_data_poorest20_wealth <- grid_cell_data_poorest20_wealth %>%
  mutate(under_percentile_21 = ifelse(under_percentile_21 < 10, NA, under_percentile_21))
grid_cell_data_poorest20_wealth <- grid_cell_data_poorest20_wealth %>%
  mutate(total_households = ifelse(total_households < 10, NA, total_households))

#### Calculate and get the data necessary for plotting the income-poorest 20% #### ----------------------------------------------------------

# Order them by income in every FUA
data2022 <- data2022 %>%
  arrange(FUA, Income_per_capita)

# Calculate the income percentile rank every household belongs to in their FUA
data2022 <- data2022 %>%
  group_by(FUA) %>% 
  mutate(income_percentile = ntile(Income_per_capita, 100))

# Count the amount of households per category
category_counts <- data2022 %>%
  group_by(income_percentile) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

# Summarize the data by grid cell
grid_cell_data <- data2022 %>%
  group_by(VRLVIERKANT100M, FUA, income_percentile) %>%
  summarize(n_households = n(), .groups = "drop")

# Re-organize the dataset
grid_cell_data_percentiles <- grid_cell_data %>%
  pivot_wider(names_from = income_percentile,
              values_from = n_households,
              names_prefix = "percentile_",
              values_fill = list(n_households = 0))

# Count the amount of households per percentile
total_households_per_percentile <- colSums(grid_cell_data_percentiles[, paste0("percentile_", 1:100)])

# Get the total population per grid cell
grid_cell_data_percentiles <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))))

# Re-organize and calculate the number of households under percentile 21
grid_cell_data_poorest20_income <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))),
         under_percentile_21 = rowSums(select(., matches("percentile_([1-9]$|1[0-9]$|20$)"))))

# Get a dataframe with for each grid cell
grid_cell_data_poorest20_income <- grid_cell_data_poorest20_income  %>%
  select(VRLVIERKANT100M, FUA, under_percentile_21, total_households)

# Select only grid cells with more than 9 observations
grid_cell_data_poorest20_income <- grid_cell_data_poorest20_income %>%
  mutate(under_percentile_21 = ifelse(under_percentile_21 < 10, NA, under_percentile_21))
grid_cell_data_poorest20_income <- grid_cell_data_poorest20_income %>%
  mutate(total_households = ifelse(total_households < 10, NA, total_households))

#### Merge the two databases regarding the poorest 20% #### ----------------------

# Re-name columns
grid_cell_data_poorest20_wealth$Poorest_20_wealth <- grid_cell_data_poorest20_wealth$under_percentile_21
grid_cell_data_poorest20_wealth$under_percentile_21 <- NULL
grid_cell_data_poorest20_income$Poorest_20_income <- grid_cell_data_poorest20_income$under_percentile_21
grid_cell_data_poorest20_income$under_percentile_21 <- NULL

# Merge
grid_cell_data_poorest20 <- merge(grid_cell_data_poorest20_wealth, grid_cell_data_poorest20_income, by = c("VRLVIERKANT100M", "FUA", "total_households"))

# Calculate percentages
grid_cell_data_poorest20$Share__wealth_poorest_20 <- grid_cell_data_poorest20$Poorest_20_wealth / grid_cell_data_poorest20$total_households
grid_cell_data_poorest20$Share__income_poorest_20 <- grid_cell_data_poorest20$Poorest_20_income / grid_cell_data_poorest20$total_households


#### Calculate and get the data necessary for plotting the wealth-richest 20% #### ----------------------------------------------------------

# Order them by wealth in every FUA
data2022 <- data2022 %>%
  arrange(FUA, Wealth_per_capita)

# Calculate the wealth percentile rank every household belongs to in their FUA
data2022 <- data2022 %>%
  group_by(FUA) %>% 
  mutate(wealth_percentile = ntile(Wealth_per_capita, 100))

# Count the amount of households per category
category_counts <- data2022 %>%
  group_by(wealth_percentile) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

# Summarize the data by grid cell
grid_cell_data <- data2022 %>%
  group_by(VRLVIERKANT100M, FUA, wealth_percentile) %>%
  summarize(n_households = n(), .groups = "drop")

# Re-organize the dataset
grid_cell_data_percentiles <- grid_cell_data %>%
  pivot_wider(names_from = wealth_percentile,
              values_from = n_households,
              names_prefix = "percentile_",
              values_fill = list(n_households = 0))

# Count the amount of households per percentile
total_households_per_percentile <- colSums(grid_cell_data_percentiles[, paste0("percentile_", 1:100)])

# Get the total population per grid cell
grid_cell_data_percentiles <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))))

# Re-organize and calculate the number of households above percentile 80
grid_cell_data_richest20_wealth <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))),
         above_percentile_80 = rowSums(select(., matches("percentile_(8[1-9]$|9[0-9]$|100$)"))))

# Get a dataframe with for each grid cell
grid_cell_data_richest20_wealth <- grid_cell_data_richest20_wealth  %>%
  select(VRLVIERKANT100M, FUA, above_percentile_80, total_households)

# Select only grid cells with more than 9 observations
grid_cell_data_richest20_wealth <- grid_cell_data_richest20_wealth %>%
  mutate(above_percentile_80 = ifelse(above_percentile_80 < 10, NA, above_percentile_80))
grid_cell_data_richest20_wealth <- grid_cell_data_richest20_wealth %>%
  mutate(total_households = ifelse(total_households < 10, NA, total_households))

#### Calculate and get the data necessary for plotting the income-richest 20% #### ----------------------------------------------------------

# Order them by income in every FUA
data2022 <- data2022 %>%
  arrange(FUA, Income_per_capita)

# Calculate the income percentile rank every household belongs to in their FUA
data2022 <- data2022 %>%
  group_by(FUA) %>% 
  mutate(income_percentile = ntile(Income_per_capita, 100))

# Count the amount of households per category
category_counts <- data2022 %>%
  group_by(income_percentile) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

# Summarize the data by grid cell
grid_cell_data <- data2022 %>%
  group_by(VRLVIERKANT100M, FUA, income_percentile) %>%
  summarize(n_households = n(), .groups = "drop")

# Re-organize the dataset
grid_cell_data_percentiles <- grid_cell_data %>%
  pivot_wider(names_from = income_percentile,
              values_from = n_households,
              names_prefix = "percentile_",
              values_fill = list(n_households = 0))

# Count the amount of households per percentile
total_households_per_percentile <- colSums(grid_cell_data_percentiles[, paste0("percentile_", 1:100)])

# Get the total population per grid cell
grid_cell_data_percentiles <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))))

# Re-organize and calculate the number of households above percentile 20
grid_cell_data_richest20_income <- grid_cell_data_percentiles %>%
  mutate(total_households = rowSums(select(., starts_with("percentile_"))),
         above_percentile_80 = rowSums(select(., matches("percentile_(8[1-9]$|9[0-9]$|100$)"))))

# Get a dataframe with for each grid cell
grid_cell_data_richest20_income <- grid_cell_data_richest20_income  %>%
  select(VRLVIERKANT100M, FUA, above_percentile_80, total_households)

# Select only grid cells with more than 9 observations
grid_cell_data_richest20_income <- grid_cell_data_richest20_income %>%
  mutate(above_percentile_80 = ifelse(above_percentile_80 < 10, NA, above_percentile_80))
grid_cell_data_richest20_income <- grid_cell_data_richest20_income %>%
  mutate(total_households = ifelse(total_households < 10, NA, total_households))


#### Merge the two databases regarding the richest 20% #### ----------------------

# Re-name columns
grid_cell_data_richest20_wealth$Richest_20_wealth <- grid_cell_data_richest20_wealth$above_percentile_80
grid_cell_data_richest20_wealth$above_percentile_80 <- NULL
grid_cell_data_richest20_income$Richest_20_income <- grid_cell_data_richest20_income$above_percentile_80
grid_cell_data_richest20_income$above_percentile_80 <- NULL

# Merge
grid_cell_data_richest20 <- merge(grid_cell_data_richest20_wealth, grid_cell_data_richest20_income, by = c("VRLVIERKANT100M", "FUA", "total_households"))

# Calculate percentages
grid_cell_data_richest20$Share__wealth_richest_20 <- grid_cell_data_richest20$Richest_20_wealth / grid_cell_data_richest20$total_households
grid_cell_data_richest20$Share__income_richest_20 <- grid_cell_data_richest20$Richest_20_income / grid_cell_data_richest20$total_households

#### Merge all databases #### ---------------------------------------------------
grid_cell_data_map_100m <- merge(grid_cell_data_poorest20, grid_cell_data_richest20, by = c("VRLVIERKANT100M", "FUA", "total_households"))

# Get a database clean of NAs
grid_cell_data_map_clean_100m <- grid_cell_data_map_100m[complete.cases(grid_cell_data_map_100m), ]


#### Get summary values for every grid cell #### --------------------------------

# Convert negative values into 1
data_ratio <- data2022
data_ratio$Wealth_per_capita <- pmax(data_ratio$Wealth_per_capita, 1)
data_ratio$Income_per_capita <- pmax(data_ratio$Income_per_capita, 1)

# Calculate household-level ratio between wealth and income
data_ratio$Ratio_wealth_income <- data_ratio$Wealth_per_capita / data_ratio$Income_per_capita

# Calculate logarithm of wealth
data_ratio$Log_wealth_per_capita <- log(data_ratio$Wealth_per_capita)

# Compute summary statistics
grid_cell_summary_100m <- data_ratio %>%
  group_by(VRLVIERKANT100M) %>%
  summarise(number_of_households = n(),
            mean_income = mean(Income_per_capita, na.rm = TRUE),
            median_income = median(Income_per_capita, na.rm = TRUE),
            sd_income = sd(Income_per_capita, na.rm = TRUE),
            mean_wealth = mean(Wealth_per_capita, na.rm = TRUE),
            median_wealth = median(Wealth_per_capita, na.rm = TRUE),
            mean_log_wealth = mean(Log_wealth_per_capita, na.rm = TRUE),
            median_log_wealth = median(Log_wealth_per_capita, na.rm = TRUE),
            sd_wealth = sd(Wealth_per_capita, na.rm = TRUE),
            sd_log_wealth = sd(Log_wealth_per_capita, na.rm = TRUE),
            mean_ratio_wealth_income = mean(Ratio_wealth_income, na.rm = TRUE),
            median_ratio_wealth_income = median(Ratio_wealth_income, na.rm = TRUE),
            )

grid_cell_summary_100m <- grid_cell_summary_100m %>%
  mutate(
    ratio_wealth_income_grid_cell = mean_wealth / mean_income,
    ratio_log_wealth_income_grid_cell = mean_log_wealth / mean_income,
    ratio_median_wealth_income_grid_cell = median_wealth / median_income,
    ratio_median_log_wealth_income_grid_cell = median_log_wealth / median_income
    )

# Select only grid cells with more than 9 observations
grid_cell_summary_100m <- grid_cell_summary_100m %>%
  filter(number_of_households > 9)

#### Repeat the process for 500 m x 500 m grid cells #### -----------------------
# Compute summary statistics
grid_cell_summary_500m <- data_ratio %>%
  group_by(VRLVIERKANT500M) %>%
  summarise(number_of_households = n(),
            mean_income = mean(Income_per_capita, na.rm = TRUE),
            median_income = median(Income_per_capita, na.rm = TRUE),
            sd_income = sd(Income_per_capita, na.rm = TRUE),
            mean_wealth = mean(Wealth_per_capita, na.rm = TRUE),
            median_wealth = median(Wealth_per_capita, na.rm = TRUE),
            mean_log_wealth = mean(Log_wealth_per_capita, na.rm = TRUE),
            median_log_wealth = median(Log_wealth_per_capita, na.rm = TRUE),
            sd_wealth = sd(Wealth_per_capita, na.rm = TRUE),
            sd_log_wealth = sd(Log_wealth_per_capita, na.rm = TRUE),
            mean_ratio_wealth_income = mean(Ratio_wealth_income, na.rm = TRUE),
            median_ratio_wealth_income = median(Ratio_wealth_income, na.rm = TRUE),
  )

grid_cell_summary_500m <- grid_cell_summary_500m %>%
  mutate(
    ratio_wealth_income_grid_cell = mean_wealth / mean_income,
    ratio_log_wealth_income_grid_cell = mean_log_wealth / mean_income,
    ratio_median_wealth_income_grid_cell = median_wealth / median_income,
    ratio_median_log_wealth_income_grid_cell = median_log_wealth / median_income
  )

# Select only grid cells with more than 9 observations
grid_cell_summary_500m <- grid_cell_summary_500m %>%
  filter(number_of_households > 9)
