##### Grouping together all info #####
# 04_output_results script
# 26/08/2025
#####

# Load packages
library(DescTools)
library(tidyverse)
library(writexl)
library(purrr)
library(readxl)

# Set the directory containing the data
data_dir <- "processed_data"

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Wealth_segregation_500m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Wealth_segregation_500m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_wealth <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_wealth$Variable <- "Wealth_500m"

# Repeat the process with the data of income segregation

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "income_segregation_500m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "income_segregation_500m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_income <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_income$Variable <- "Income_500m"



# Repeat the process with the data of wealth per capita segregation

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Wealth_per_capita_segregation_500m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Wealth_per_capita_segregation_500m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_wealth_per_capita <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_wealth_per_capita$Variable <- "Wealth_per_capita_500m"






# Repeat the process with the data of income per capita segregation

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "income_per_capita_segregation_500m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "income_per_capita_segregation_500m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_income_per_capita <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_income_per_capita$Variable <- "Income_per_capita_500m"

#### Repeat the process with wealth segregation estimated with 4000m local environments ####

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Wealth_segregation_4000m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Wealth_segregation_4000m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_wealth_4000 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_wealth_4000$Variable <- "Wealth_4000m"


#### Repeat the process with income segregation estimated with 4000m local environments ####

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "income_segregation_4000m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "income_segregation_4000m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_income_4000 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_income_4000$Variable <- "Income_4000m"


# Repeat the process with the data of wealth per capita segregation with a 4000m radius

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Wealth_per_capita_segregation_4000m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Wealth_per_capita_segregation_4000m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_wealth_4000_per_capita <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_wealth_4000_per_capita$Variable <- "Wealth_per_capita_4000m"


# Repeat the process with the data of income per capita segregation with a 4000m radius

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "income_per_capita_segregation_4000m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "income_per_capita_segregation_4000m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_income_4000_per_capita <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_income_4000_per_capita$Variable <- "Income_per_capita_4000m"




# Repeat the process with the data of segregation based on adjusted income with a 500m radius

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "income_adjusted_segregation_500m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "income_adjusted_segregation_500m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_income_adjusted_500 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_income_adjusted_500$Variable <- "Income_adjusted_500m"














# Repeat the process with the data of segregation based on adjusted income with a 4000m radius

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "income_adjusted_segregation_4000m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "income_adjusted_segregation_4000m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_income_adjusted_4000 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_income_adjusted_4000$Variable <- "Income_adjusted_4000m"




# Repeat the process with the data of segregation based on real estate on a 500m radius

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Wealth_per_capita_real_estate_segregation_500m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Wealth_per_capita_real_estate_segregation_500m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_real_estate_500 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_real_estate_500$Variable <- "Real_estate_500m"



# Repeat the process with the data of segregation based on real estate on a 4000m radius

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Wealth_per_capita_real_estate_segregation_4000m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Wealth_per_capita_real_estate_segregation_4000m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_real_estate_4000 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_real_estate_4000$Variable <- "Real_estate_4000m"




# Repeat the process with the data of segregation based on movable wealth on a 500m radius

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Wealth_per_capita_excluding_real_estate_segregation_500m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Wealth_per_capita_excluding_real_estate_segregation_500m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_movable_wealth_500 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_movable_wealth_500$Variable <- "Movable_wealth_500m"


# Repeat the process with the data of segregation based on movable wealth on a 4000m radius

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Wealth_per_capita_excluding_real_estate_segregation_4000m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Wealth_per_capita_excluding_real_estate_segregation_4000m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_movable_wealth_4000 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_movable_wealth_4000$Variable <- "Movable_wealth_4000m"



#### Repeat the process with the per adult capita data ####

#### First with overall wealth per adult capita with radius = 500 m ####

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Wealth_per_adult_capita_segregation_500m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Wealth_per_adult_capita_segregation_500m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_wealth_per_adult_capita_500 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_wealth_per_adult_capita_500$Variable <- "Wealth_per_adult_capita_500m"


#### Overall wealth per adult capita with radius = 4000 m ####

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Wealth_per_adult_capita_segregation_4000m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Wealth_per_adult_capita_segregation_4000m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_wealth_per_adult_capita_4000 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_wealth_per_adult_capita_4000$Variable <- "Wealth_per_adult_capita_4000m"



#### Third with income per adult capita with radius = 500 m ####

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Income_per_adult_capita_segregation_500m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Income_per_adult_capita_segregation_500m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_income_per_adult_capita_500 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_income_per_adult_capita_500$Variable <- "Income_per_adult_capita_500m"




#### Fourth with income per adult capita with radius = 4000 m ####

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Income_per_adult_capita_segregation_4000m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Income_per_adult_capita_segregation_4000m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_income_per_adult_capita_4000 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_income_per_adult_capita_4000$Variable <- "Income_per_adult_capita_4000m"



#### Fifth with real estate wealth per adult capita with radius = 500 m ####

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Real_estate_wealth_per_adult_capita_segregation_500m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Real_estate_wealth_per_adult_capita_segregation_500m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_real_estate_per_adult_capita_500 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_real_estate_per_adult_capita_500$Variable <- "Real_estate_per_adult_capita_500m"


#### Sixth with real estate wealth per adult capita with radius = 4000 m ####

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Real_estate_wealth_per_adult_capita_segregation_4000m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Real_estate_wealth_per_adult_capita_segregation_4000m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_real_estate_per_adult_capita_4000 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_real_estate_per_adult_capita_4000$Variable <- "Real_estate_per_adult_capita_4000m"



#### Seventh with movable wealth per adult capita with radius = 500 m ####

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Movable_wealth_per_adult_capita_segregation_500m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Movable_wealth_per_adult_capita_segregation_500m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_movable_wealth_per_adult_capita_500 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_movable_wealth_per_adult_capita_500$Variable <- "Movable_wealth_per_adult_capita_500m"

#### Eigth with movable wealth per adult capita with radius = 4000 m ####

# List all CSV files in the directory
files <- list.files(data_dir, pattern = "Movable_wealth_per_adult_capita_segregation_4000m_\\d{4}_.+\\.csv", full.names = TRUE)

# Create a function to read each file and add new columns to prepare them to be merged into a single database
load_and_add_columns <- function(file_path) {
  file_name <- basename(file_path)
  pattern <- "Movable_wealth_per_adult_capita_segregation_4000m_(\\d{4})_(.+)\\.csv"
  matches <- regmatches(file_name, regexec(pattern, file_name))
  year <- matches[[1]][2]
  city <- matches[[1]][3]
  
  # Read the CSV file
  df <- read_csv(file_path)
  
  # Add the year and FUA columns
  df <- df %>%
    mutate(Year = as.integer(year),
           FUA = city)
  
  return(df)
}

# Load all files and combine them
all_data_movable_wealth_per_adult_capita_4000 <- files %>%
  lapply(load_and_add_columns) %>%
  bind_rows()

# Add a column specifying which segregation variable it is
all_data_movable_wealth_per_adult_capita_4000$Variable <- "Movable_wealth_per_adult_capita_4000m"


##### Final step #####
# Merge all dataframes
all_data <- bind_rows(all_data_wealth,
                      all_data_income,
                      all_data_wealth_per_capita,
                      all_data_income_per_capita,
                      all_data_wealth_4000,
                      all_data_income_4000,
                      all_data_wealth_4000_per_capita,
                      all_data_income_4000_per_capita,
                      all_data_income_adjusted_500,
                      all_data_income_adjusted_4000,
                      all_data_real_estate_500,
                      all_data_real_estate_4000,
                      all_data_movable_wealth_500,
                      all_data_movable_wealth_4000,
                      all_data_wealth_per_adult_capita_500,
                      all_data_wealth_per_adult_capita_4000,
                      all_data_income_per_adult_capita_500,
                      all_data_income_per_adult_capita_4000,
                      all_data_real_estate_per_adult_capita_500,
                      all_data_real_estate_per_adult_capita_4000,
                      all_data_movable_wealth_per_adult_capita_500,
                      all_data_movable_wealth_per_adult_capita_4000)

# Save as a csv file
write.csv(all_data, file = "segregation_data_FUA_level.csv", row.names = FALSE)

# Delete observations before 2011 for two reasons: there is no consistent income data before that year and because excel does not allow for more than 1 million rows
all_data_excel <- all_data %>%
  filter(Year > 2010)

# Save as a xlsx file
write_xlsx(all_data_excel, path = "segregation_data_FUA_level.xlsx")

#### Check similarities between per capita calculations and per adult capita calculations ####

# Merge selected databases
checking_wealth <- merge(all_data_wealth_per_capita, all_data_wealth_per_adult_capita_500, by = c("Group", "Year", "FUA"))
checking_income <- merge(all_data_income_per_capita, all_data_income_per_adult_capita_500, by = c("Group", "Year", "FUA"))
checking_real_estate <- merge(all_data_real_estate_500, all_data_real_estate_per_adult_capita_500, by = c("Group", "Year", "FUA"))
checking_movable_wealth <- merge(all_data_movable_wealth_500, all_data_movable_wealth_per_adult_capita_500, by = c("Group", "Year", "FUA"))
checking_wealth_4000 <- merge(all_data_wealth_4000_per_capita, all_data_wealth_per_adult_capita_4000, by = c("Group", "Year", "FUA"))
checking_income_4000 <- merge(all_data_income_4000_per_capita, all_data_income_per_adult_capita_4000, by = c("Group", "Year", "FUA"))
checking_real_estate_4000 <- merge(all_data_real_estate_4000, all_data_real_estate_per_adult_capita_4000, by = c("Group", "Year", "FUA"))
checking_movable_wealth_4000 <- merge(all_data_movable_wealth_4000, all_data_movable_wealth_per_adult_capita_4000, by = c("Group", "Year", "FUA"))

# Calculate correlations
cor(checking_wealth$Value.x, checking_wealth$Value.y)
cor(checking_wealth_4000$Value.x, checking_wealth_4000$Value.y)
cor(checking_income$Value.x, checking_income$Value.y)
cor(checking_income_4000$Value.x, checking_income_4000$Value.y)
cor(checking_real_estate$Value.x, checking_real_estate$Value.y)
cor(checking_real_estate_4000$Value.x, checking_real_estate_4000$Value.y)
cor(checking_movable_wealth$Value.x, checking_movable_wealth$Value.y)
cor(checking_movable_wealth_4000$Value.x, checking_movable_wealth_4000$Value.y)

#### Calculate the population of every FUA #### -----------------------------------

# Load data
data2022 <- read.csv("raw_data/database_2022.csv") # only if not already loaded

# Calculate the number of people per FUA
FUA_population <- data2022 %>%
  group_by(FUA) %>%
  summarize(number_of_households = n(),
            number_of_people = sum(INHAHL))

# Load the rest of the data
data2021 <- read.csv("raw_data/database_2021.csv") # only if not already loaded
data2020 <- read.csv("raw_data/database_2020.csv") # only if not already loaded
data2019 <- read.csv("raw_data/database_2019.csv") # only if not already loaded
data2018 <- read.csv("raw_data/database_2018.csv") # only if not already loaded
data2017 <- read.csv("raw_data/database_2017.csv") # only if not already loaded


data2016 <- read.csv("raw_data/database_2016.csv") # only if not already loaded
data2015 <- read.csv("raw_data/database_2015.csv") # only if not already loaded
data2014 <- read.csv("raw_data/database_2014.csv") # only if not already loaded
data2013 <- read.csv("raw_data/database_2013.csv") # only if not already loaded
data2012 <- read.csv("raw_data/database_2012.csv") # only if not already loaded
data2011 <- read.csv("raw_data/database_2011.csv") # only if not already loaded

# Set the vector of years 
years <- 2011:2022

# Create a function to load and process each year database
process_year <- function(year) {
  df <- read.csv(paste0("raw_data/database_", year, ".csv"))
  on.exit({ rm(df); gc()}, add = TRUE)
  
  df %>%
    group_by(FUA) %>%
    summarize(
      number_of_households = n(),
      number_of_people = sum(INHAHL, na.rm = TRUE)
    ) %>%
    mutate(year = year)
  
  }

# Apply function to all years and combine them
FUA_population_all <- map_dfr(years, process_year)

# Save as a xlsx file
write_xlsx(FUA_population_all, path = "FUA_population_data_all_years.xlsx")

# Save as a csv file
write.csv(FUA_population_all, file = "FUA_population_data_all_years.csv", row.names = FALSE)

# Save as a csv file info for 2022
#write.csv(FUA_population, file = "FUA_population_data.csv", row.names = FALSE)
# Save as a xlsx file info for 2022
#write_xlsx(FUA_population, path = "FUA_population_data.xlsx")

#### Add population per FUA and per FUA-percentile - required by CBS inspectors ####

# Load the data just saved
segregation_data_FUA_level <- read_excel("segregation_data_FUA_level.xlsx")

# Rename a column
FUA_population_all <-  FUA_population_all %>%
  rename(Year = year)

# Merge
segregation_data_FUA_level <- merge(segregation_data_FUA_level, FUA_population_all, by = c("FUA", "Year"), all.x = TRUE, all.y= FALSE)

# Calculate the number of obsersations per percentile
segregation_data_FUA_level <- segregation_data_FUA_level %>%
  mutate(Number_of_observations = floor(number_of_households / 100))

# Delete superflous information
segregation_data_FUA_level$number_of_households <- NULL
segregation_data_FUA_level$number_of_people <- NULL

# Save as a xlsx file
write_xlsx(segregation_data_FUA_level, path = "segregation_data_FUA_level.xlsx")

#### Save inequality data as well ####
# Save as a csv file
write.csv(Gini_national, file = "Inequality_data.csv", row.names = FALSE)
# Save as a xlsx file
write_xlsx(Gini_national, path = "Inequality_data.xlsx")

# Save as a csv file
write.csv(distribution, file = "Distribution_data.csv", row.names = FALSE)
# Save as a xlsx file
write_xlsx(distribution, path = "Distribution_data.xlsx")



#### Export data for the map #### -----------------------------------

# Save as a csv file
write.csv(grid_cell_data_map, file = "grid_cell_data_map.csv", row.names = FALSE)

# Save as a xlsx file
write_xlsx(grid_cell_data_map, path = "grid_cell_data_map.xlsx")

# Save as a csv file
write.csv(grid_cell_data_map_clean, file = "grid_cell_data_map_clean.csv", row.names = FALSE)

# Save as a xlsx file
write_xlsx(grid_cell_data_map_clean, path = "grid_cell_data_map_clean.xlsx")

# Save as a csv file
write.csv(grid_cell_data_map_100m, file = "grid_cell_data_map_100m.csv", row.names = FALSE)

# Save as a xlsx file
write_xlsx(grid_cell_data_map_100m, path = "grid_cell_data_map_100m.xlsx")

# Save as a csv file
write.csv(grid_cell_data_map_clean_100m, file = "grid_cell_data_map_clean_100m.csv", row.names = FALSE)

# Save as a xlsx file
write_xlsx(grid_cell_data_map_clean_100m, path = "grid_cell_data_map_clean_100m.xlsx")

#### Export summary data per grid cell #### ------------------------------------------------------------------------------

# Save as a csv file
write.csv(grid_cell_summary_100m, file = "grid_cell_summary_100m.csv", row.names = FALSE)

# Save as a xlsx file
write_xlsx(grid_cell_summary_100m, path = "grid_cell_summary_100m.xlsx")

# Save as a csv file
write.csv(grid_cell_summary_500m, file = "grid_cell_summary_500m.csv", row.names = FALSE)

# Save as a xlsx file
write_xlsx(grid_cell_summary_500m, path = "grid_cell_summary_500m.xlsx")

#### Export percentile thresholds #### ------------------------------------------------------------------------------
# Save as a csv file
write.csv(percentile_thresholds, file = "percentile_thresholds.csv", row.names = FALSE)

# Save as a xlsx file
write_xlsx(percentile_thresholds, path = "percentile_thresholds.xlsx")

#### Export shares of different kinds of wealth #### ---------------------------------------------------------
# Save as a csv file
write.csv(share_type_wealth_percentile, file = "share_type_wealth_percentile.csv", row.names = FALSE)

# Save as a xlsx file
write_xlsx(share_type_wealth_percentile, path = "share_type_wealth_percentile.xlsx")

