##### Linking wealth and income data, together with information about their residential address and housing tenure #####
# 01_load_and_process_data script
# 14/06/2024
# We create a full-information database of households in the Netherlands, with data about:
# Income (INHATAB or IHI)
# Wealth (VEHTAB)
# Location (VSLVIERKANTTAB + GBAADRESOBJECTBUS + VSLGWBTAB)
# Housing tenure (EIGENDOM or EIGENDOMWOZTAB)

#### Preparation ##### ----------------------------------------------------------

# load the required packages
library(haven)
library(ineq)
library(DescTools)
library(tidyverse)
library(OasisR)
library(seg)
library(sf)
library(sp)

# Do not use scientific notation
options(scipen = 9999999)

# load the general CBS databases
grid <- read_sav("G:/BouwenWonen/VSLVIERKANTTAB/VRLVSLVIERKANTTABV2024031.sav")
address <- read_sav("G:/Bevolking/GBAADRESOBJECTBUS/GBAADRESOBJECT2023BUSV1.sav")
municipality <- read_sav("G:/BouwenWonen/VSLGWBTAB/VSLGWB2024TAB03V1.sav")

# load the csv file with the Functional Urban Area of every municipality
FUA <- read.csv("raw_data/Municipalities and FUA_V5.csv", sep = ";")

# make the file be consistent with the municipality code that appears in the income database
FUA$Code <- sprintf("%04d", FUA$Code)

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
address <- mutate(address, ID = paste(address$RINPERSOONS, address$RINPERSOON))

# make the address database more readable
address <- rename(address, move_in = GBADATUMAANVANGADRESHOUDING)
address <- rename(address, move_out = GBADATUMEINDEADRESHOUDING)

#### Calculations for the year 2022 #### ------------------------------------------------------------------------

# load the CBS income database specific for the year we want
income2022 <- read_sav("G:/InkomenBestedingen/INHATAB/INHA2022TABV1.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2022 <- mutate(income2022, ID = paste(income2022$RINPERSOONSHKW, income2022$RINPERSOONHKW))

# extract observations from the "address" database corresponding to the year 2022
address_2022 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2022-01-02") & move_out > as.Date("2022-01-01"))

# Add RINOBJECTNUMMER to the income2022 database
income2022 <- merge(income2022, address_2022, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2022 database
income2022 <- merge(income2022, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2022 database
income2022 <- merge(income2022, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2022$gem2022 <- sprintf("%04d", income2022$gem2022)

# merge the databases with the Functional Urban Area information
income2022 <- merge(income2022, FUA, by.x ="gem2022", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2022 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2022TABV1.sav")

# Add an ID variable to the wealth database
wealth2022 <- mutate(wealth2022, ID = paste(wealth2022$RINPERSOONSHKW, wealth2022$RINPERSOONHKW))

# merge the income data with the wealth data
income2022 <- merge(income2022, wealth2022, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOM data
EIGENDOM2022 <- read_sav("G:/BouwenWonen/EIGENDOMTAB/EIGENDOM2022TABV2.sav")

# merge the income and wealth data with the property data
income2022 <- merge(income2022, EIGENDOM2022, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2022 <- income2022[complete.cases(income2022$TypeEigendom),]
income2022 <- subset(income2022, TypeEigendom != "O")

# Create a variable for homeowners
income2022 <- income2022 %>%
  mutate(Ownership = if_else(TypeEigendom == "E", 1, 0))

# Identify which house-owners have a mortgage
income2022 <- income2022 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & TypeEigendom == "E", 1, 0))

# Create a variable for renters of social housing
income2022 <- income2022 %>%
  mutate(Social_housing = if_else(TypeEigenaar == "C", 1, 0))

# Create a variable for renters of non-social housing
income2022 <- income2022 %>%
  mutate(Non_social_rental_housing = if_else(TypeEigenaar == "V", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2022 <- select(income2022, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        INHAHL, #number of people in the household
                        INHBBIHJ, # main source of household income (it is repeated in the metadata?)
                        INHBELIH, # Taxable household income
                        INHBESTINKH, # net income
                        INHBRUTINKH, # gross income
                        INHEHALGR, # home ownership and whether the household receives a rent allowance
                        INHGESTINKH, # net income adjusted for household size and composition
                        INHPRIMINKH, # primary income (income from work, assets and own business)
                        INHSAMHH, # information about the household composition
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2022, "database_2022.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2022)
rm(wealth2022)
rm(EIGENDOM2022)
rm(address_2022)
rm(database_2022)



#### Calculations for the year 2021 #### ---------------------------------------------

# load the CBS income database specific for the year we want
income2021 <- read_sav("G:/InkomenBestedingen/INHATAB/INHA2021TABV2.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2021 <- mutate(income2021, ID = paste(income2021$RINPERSOONSHKW, income2021$RINPERSOONHKW))

# extract observations from the "address" database corresponding to the year 2021
address_2021 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2021-01-02") & move_out > as.Date("2021-01-01"))

# Add RINOBJECTNUMMER to the income2021 database
income2021 <- merge(income2021, address_2021, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2021 database
income2021 <- merge(income2021, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2021 database
income2021 <- merge(income2021, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2021$gem2021 <- sprintf("%04d", income2021$gem2021)

# merge the databases with the Functional Urban Area information
income2021 <- merge(income2021, FUA, by.x ="gem2021", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2021 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2021TABV3.sav")

# Add an ID variable to the wealth database
wealth2021 <- mutate(wealth2021, ID = paste(wealth2021$RINPERSOONSHKW, wealth2021$RINPERSOONHKW))

# merge the income data with the wealth data
income2021 <- merge(income2021, wealth2021, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOM data
EIGENDOM2021 <- read_sav("G:/BouwenWonen/EIGENDOMTAB/EIGENDOM2021TABV2.sav")

# merge the income and wealth data with the property data
income2021 <- merge(income2021, EIGENDOM2021, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2021 <- income2021[complete.cases(income2021$TypeEigendom),]
income2021 <- subset(income2021, TypeEigendom != "O")

# Create a variable for homeowners
income2021 <- income2021 %>%
  mutate(Ownership = if_else(TypeEigendom == "E", 1, 0))

# Identify which house-owners have a mortgage
income2021 <- income2021 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & TypeEigendom == "E", 1, 0))

# Create a variable for renters of social housing
income2021 <- income2021 %>%
  mutate(Social_housing = if_else(TypeEigenaar == "C", 1, 0))

# Create a variable for renters of non-social housing
income2021 <- income2021 %>%
  mutate(Non_social_rental_housing = if_else(TypeEigenaar == "V", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2021 <- select(income2021, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        INHAHL, #number of people in the household
                        INHBBIHJ, # main source of household income (it is repeated in the metadata?)
                        INHBELIH, # Taxable household income
                        INHBESTINKH, # net income
                        INHBRUTINKH, # gross income
                        INHEHALGR, # home ownership and whether the household receives a rent allowance
                        INHGESTINKH, # net income adjusted for household size and composition
                        INHPRIMINKH, # primary income (income from work, assets and own business)
                        INHSAMHH, # information about the household composition
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 

# Save the data
write.csv(database_2021, "database_2021.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2021)
rm(wealth2021)
rm(EIGENDOM2021)
rm(address_2021)
rm(database_2021)

#### Calculations for the year 2020

# load the CBS income database specific for the year we want
income2020 <- read_sav("G:/InkomenBestedingen/INHATAB/INHA2020TABV2.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2020 <- mutate(income2020, ID = paste(income2020$RINPERSOONSHKW, income2020$RINPERSOONHKW))

# extract observations from the "address" database corresponding to the year 2020
address_2020 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2020-01-02") & move_out > as.Date("2020-01-01"))

# Add RINOBJECTNUMMER to the income2020 database
income2020 <- merge(income2020, address_2020, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2020 database
income2020 <- merge(income2020, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2020 database
income2020 <- merge(income2020, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2020$gem2020 <- sprintf("%04d", income2020$gem2020)

# merge the databases with the Functional Urban Area information
income2020 <- merge(income2020, FUA, by.x ="gem2020", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2020 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2020TABV3.sav")

# Add an ID variable to the wealth database
wealth2020 <- mutate(wealth2020, ID = paste(wealth2020$RINPERSOONSHKW, wealth2020$RINPERSOONHKW))

# merge the income data with the wealth data
income2020 <- merge(income2020, wealth2020, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOM data
EIGENDOM2020 <- read_sav("G:/BouwenWonen/EIGENDOMTAB/EIGENDOM2020TABV3.sav")

# merge the income and wealth data with the property data
income2020 <- merge(income2020, EIGENDOM2020, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2020 <- income2020[complete.cases(income2020$TypeEigendom),]
income2020 <- subset(income2020, TypeEigendom != "O")

# Create a variable for homeowners
income2020 <- income2020 %>%
  mutate(Ownership = if_else(TypeEigendom == "E", 1, 0))

# Identify which house-owners have a mortgage
income2020 <- income2020 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & TypeEigendom == "E", 1, 0))

# Create a variable for renters of social housing
income2020 <- income2020 %>%
  mutate(Social_housing = if_else(TypeEigenaar == "C", 1, 0))

# Create a variable for renters of non-social housing
income2020 <- income2020 %>%
  mutate(Non_social_rental_housing = if_else(TypeEigenaar == "V", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2020 <- select(income2020, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        INHAHL, #number of people in the household
                        INHBBIHJ, # main source of household income (it is repeated in the metadata?)
                        INHBELIH, # Taxable household income
                        INHBESTINKH, # net income
                        INHBRUTINKH, # gross income
                        INHEHALGR, # home ownership and whether the household receives a rent allowance
                        INHGESTINKH, # net income adjusted for household size and composition
                        INHPRIMINKH, # primary income (income from work, assets and own business)
                        INHSAMHH, # information about the household composition
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2020, "database_2020.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2020)
rm(wealth2020)
rm(EIGENDOM2020)
rm(address_2020)
rm(database_2020)


#### Calculations for the year 2019

# load the CBS income database specific for the year we want
income2019 <- read_sav("G:/InkomenBestedingen/INHATAB/INHA2019TABV2.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2019 <- mutate(income2019, ID = paste(income2019$RINPERSOONSHKW, income2019$RINPERSOONHKW))

# extract observations from the "address" database corresponding to the year 2019
address_2019 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2019-01-02") & move_out > as.Date("2019-01-01"))

# Add RINOBJECTNUMMER to the income2019 database
income2019 <- merge(income2019, address_2019, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2019 database
income2019 <- merge(income2019, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2019 database
income2019 <- merge(income2019, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2019$gem2019 <- sprintf("%04d", income2019$gem2019)

# merge the databases with the Functional Urban Area information
income2019 <- merge(income2019, FUA, by.x ="gem2019", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2019 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2019TABV4.sav")

# Add an ID variable to the wealth database
wealth2019 <- mutate(wealth2019, ID = paste(wealth2019$RINPERSOONSHKW, wealth2019$RINPERSOONHKW))

# merge the income data with the wealth data
income2019 <- merge(income2019, wealth2019, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOM data
EIGENDOM2019 <- read_sav("G:/BouwenWonen/EIGENDOMTAB/EIGENDOM2019TABV3.sav")

# merge the income and wealth data with the property data
income2019 <- merge(income2019, EIGENDOM2019, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2019 <- income2019[complete.cases(income2019$TypeEigendom),]
income2019 <- subset(income2019, TypeEigendom != "O")

# Create a variable for homeowners
income2019 <- income2019 %>%
  mutate(Ownership = if_else(TypeEigendom == "E", 1, 0))

# Identify which house-owners have a mortgage
income2019 <- income2019 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & TypeEigendom == "E", 1, 0))

# Create a variable for renters of social housing
income2019 <- income2019 %>%
  mutate(Social_housing = if_else(TypeEigenaar == "C", 1, 0))

# Create a variable for renters of non-social housing
income2019 <- income2019 %>%
  mutate(Non_social_rental_housing = if_else(TypeEigenaar == "V", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2019 <- select(income2019, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        INHAHL, #number of people in the household
                        INHBBIHJ, # main source of household income (it is repeated in the metadata?)
                        INHBELIH, # Taxable household income
                        INHBESTINKH, # net income
                        INHBRUTINKH, # gross income
                        INHEHALGR, # home ownership and whether the household receives a rent allowance
                        INHGESTINKH, # net income adjusted for household size and composition
                        INHPRIMINKH, # primary income (income from work, assets and own business)
                        INHSAMHH, # information about the household composition
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2019, "database_2019.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2019)
rm(wealth2019)
rm(EIGENDOM2019)
rm(address_2019)
rm(database_2019)



#### Calculations for the year 2018

# load the CBS income database specific for the year we want
income2018 <- read_sav("G:/InkomenBestedingen/INHATAB/INHA2018TABV2.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2018 <- mutate(income2018, ID = paste(income2018$RINPERSOONSHKW, income2018$RINPERSOONHKW))

# extract observations from the "address" database corresponding to the year 2018
address_2018 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2018-01-02") & move_out > as.Date("2018-01-01"))

# Add RINOBJECTNUMMER to the income2018 database
income2018 <- merge(income2018, address_2018, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2018 database
income2018 <- merge(income2018, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2018 database
income2018 <- merge(income2018, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2018$gem2018 <- sprintf("%04d", income2018$gem2018)

# merge the databases with the Functional Urban Area information
income2018 <- merge(income2018, FUA, by.x ="gem2018", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2018 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2018TABV3.sav")

# Add an ID variable to the wealth database
wealth2018 <- mutate(wealth2018, ID = paste(wealth2018$RINPERSOONSHKW, wealth2018$RINPERSOONHKW))

# merge the income data with the wealth data
income2018 <- merge(income2018, wealth2018, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOM data
EIGENDOM2018 <- read_sav("G:/BouwenWonen/EIGENDOMTAB/EIGENDOM2018TABV3.sav")

# merge the income and wealth data with the property data
income2018 <- merge(income2018, EIGENDOM2018, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2018 <- income2018[complete.cases(income2018$TypeEigendom),]
income2018 <- subset(income2018, TypeEigendom != "O")

# Create a variable for homeowners
income2018 <- income2018 %>%
  mutate(Ownership = if_else(TypeEigendom == "E", 1, 0))

# Identify which house-owners have a mortgage
income2018 <- income2018 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & TypeEigendom == "E", 1, 0))

# Create a variable for renters of social housing
income2018 <- income2018 %>%
  mutate(Social_housing = if_else(TypeEigenaar == "C", 1, 0))

# Create a variable for renters of non-social housing
income2018 <- income2018 %>%
  mutate(Non_social_rental_housing = if_else(TypeEigenaar == "V", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2018 <- select(income2018, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        INHAHL, #number of people in the household
                        INHBBIHJ, # main source of household income (it is repeated in the metadata?)
                        INHBELIH, # Taxable household income
                        INHBESTINKH, # net income
                        INHBRUTINKH, # gross income
                        INHEHALGR, # home ownership and whether the household receives a rent allowance
                        INHGESTINKH, # net income adjusted for household size and composition
                        INHPRIMINKH, # primary income (income from work, assets and own business)
                        INHSAMHH, # information about the household composition
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2018, "database_2018.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2018)
rm(wealth2018)
rm(EIGENDOM2018)
rm(address_2018)
rm(database_2018)






#### Calculations for the year 2017

# load the CBS income database specific for the year we want
income2017 <- read_sav("G:/InkomenBestedingen/INHATAB/INHA2017TABV2.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2017 <- mutate(income2017, ID = paste(income2017$RINPERSOONSHKW, income2017$RINPERSOONHKW))

# extract observations from the "address" database corresponding to the year 2017
address_2017 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2017-01-02") & move_out > as.Date("2017-01-01"))

# Add RINOBJECTNUMMER to the income2017 database
income2017 <- merge(income2017, address_2017, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2017 database
income2017 <- merge(income2017, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2017 database
income2017 <- merge(income2017, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2017$gem2017 <- sprintf("%04d", income2017$gem2017)

# merge the databases with the Functional Urban Area information
income2017 <- merge(income2017, FUA, by.x ="gem2017", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2017 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2017TABV3.sav")

# Add an ID variable to the wealth database
wealth2017 <- mutate(wealth2017, ID = paste(wealth2017$RINPERSOONSHKW, wealth2017$RINPERSOONHKW))

# merge the income data with the wealth data
income2017 <- merge(income2017, wealth2017, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOM data
EIGENDOM2017 <- read_sav("G:/BouwenWonen/EIGENDOMTAB/EIGENDOM2017TABV5.sav")

# merge the income and wealth data with the property data
income2017 <- merge(income2017, EIGENDOM2017, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2017 <- income2017[complete.cases(income2017$TypeEigendom),]
income2017 <- subset(income2017, TypeEigendom != "O")

# Create a variable for homeowners
income2017 <- income2017 %>%
  mutate(Ownership = if_else(TypeEigendom == "E", 1, 0))

# Identify which house-owners have a mortgage
income2017 <- income2017 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & TypeEigendom == "E", 1, 0))

# Create a variable for renters of social housing
income2017 <- income2017 %>%
  mutate(Social_housing = if_else(TypeEigenaar == "C", 1, 0))

# Create a variable for renters of non-social housing
income2017 <- income2017 %>%
  mutate(Non_social_rental_housing = if_else(TypeEigenaar == "V", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2017 <- select(income2017, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        INHAHL, #number of people in the household
                        INHBBIHJ, # main source of household income (it is repeated in the metadata?)
                        INHBELIH, # Taxable household income
                        INHBESTINKH, # net income
                        INHBRUTINKH, # gross income
                        INHEHALGR, # home ownership and whether the household receives a rent allowance
                        INHGESTINKH, # net income adjusted for household size and composition
                        INHPRIMINKH, # primary income (income from work, assets and own business)
                        INHSAMHH, # information about the household composition
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2017, "database_2017.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2017)
rm(wealth2017)
rm(EIGENDOM2017)
rm(address_2017)
rm(database_2017)




#### Calculations for the year 2016

# load the CBS income database specific for the year we want
income2016 <- read_sav("G:/InkomenBestedingen/INHATAB/INHA2016TABV2.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2016 <- mutate(income2016, ID = paste(income2016$RINPERSOONSHKW, income2016$RINPERSOONHKW))

# extract observations from the "address" database corresponding to the year 2016
address_2016 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2016-01-02") & move_out > as.Date("2016-01-01"))

# Add RINOBJECTNUMMER to the income2016 database
income2016 <- merge(income2016, address_2016, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2016 database
income2016 <- merge(income2016, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2016 database
income2016 <- merge(income2016, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2016$gem2016 <- sprintf("%04d", income2016$gem2016)

# merge the databases with the Functional Urban Area information
income2016 <- merge(income2016, FUA, by.x ="gem2016", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2016 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2016TABV3.sav")

# Add an ID variable to the wealth database
wealth2016 <- mutate(wealth2016, ID = paste(wealth2016$RINPERSOONSHKW, wealth2016$RINPERSOONHKW))

# merge the income data with the wealth data
income2016 <- merge(income2016, wealth2016, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOM data
EIGENDOM2016 <- read_sav("G:/BouwenWonen/EIGENDOMTAB/EIGENDOM2016TABV6.sav")

# merge the income and wealth data with the property data
income2016 <- merge(income2016, EIGENDOM2016, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2016 <- income2016[complete.cases(income2016$TypeEigendom),]
income2016 <- subset(income2016, TypeEigendom != "O")

# Create a variable for homeowners
income2016 <- income2016 %>%
  mutate(Ownership = if_else(TypeEigendom == "E", 1, 0))

# Identify which house-owners have a mortgage
income2016 <- income2016 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & TypeEigendom == "E", 1, 0))

# Create a variable for renters of social housing
income2016 <- income2016 %>%
  mutate(Social_housing = if_else(TypeEigenaar == "C", 1, 0))

# Create a variable for renters of non-social housing
income2016 <- income2016 %>%
  mutate(Non_social_rental_housing = if_else(TypeEigenaar == "V", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2016 <- select(income2016, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        INHAHL, #number of people in the household
                        INHBBIHJ, # main source of household income (it is repeated in the metadata?)
                        INHBELIH, # Taxable household income
                        INHBESTINKH, # net income
                        INHBRUTINKH, # gross income
                        INHEHALGR, # home ownership and whether the household receives a rent allowance
                        INHGESTINKH, # net income adjusted for household size and composition
                        INHPRIMINKH, # primary income (income from work, assets and own business)
                        INHSAMHH, # information about the household composition
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 



# Save the data
write.csv(database_2016, "database_2016.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2016)
rm(wealth2016)
rm(EIGENDOM2016)
rm(address_2016)
rm(database_2016)



#### Calculations for the year 2015

# load the CBS income database specific for the year we want
income2015 <- read_sav("G:/InkomenBestedingen/INHATAB/INHA2015TABV1.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2015 <- mutate(income2015, ID = paste(income2015$RINPERSOONSHKW, income2015$RINPERSOONHKW))

# extract observations from the "address" database corresponding to the year 2015
address_2015 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2015-01-02") & move_out > as.Date("2015-01-01"))

# Add RINOBJECTNUMMER to the income2015 database
income2015 <- merge(income2015, address_2015, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2015 database
income2015 <- merge(income2015, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2015 database
income2015 <- merge(income2015, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2015$gem2015 <- sprintf("%04d", income2015$gem2015)

# merge the databases with the Functional Urban Area information
income2015 <- merge(income2015, FUA, by.x ="gem2015", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2015 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2015TABV2.sav")

# Add an ID variable to the wealth database
wealth2015 <- mutate(wealth2015, ID = paste(wealth2015$RINPERSOONSHKW, wealth2015$RINPERSOONHKW))

# merge the income data with the wealth data
income2015 <- merge(income2015, wealth2015, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOM data
EIGENDOM2015 <- read_sav("G:/BouwenWonen/EIGENDOMTAB/EIGENDOM2015TABV6.sav")

# merge the income and wealth data with the property data
income2015 <- merge(income2015, EIGENDOM2015, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2015 <- income2015[complete.cases(income2015$TypeEigendom),]
income2015 <- subset(income2015, TypeEigendom != "O")

# Create a variable for homeowners
income2015 <- income2015 %>%
  mutate(Ownership = if_else(TypeEigendom == "E", 1, 0))

# Identify which house-owners have a mortgage
income2015 <- income2015 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & TypeEigendom == "E", 1, 0))

# Create a variable for renters of social housing
income2015 <- income2015 %>%
  mutate(Social_housing = if_else(TypeEigenaar == "C", 1, 0))

# Create a variable for renters of non-social housing
income2015 <- income2015 %>%
  mutate(Non_social_rental_housing = if_else(TypeEigenaar == "V", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2015 <- select(income2015, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        INHAHL, #number of people in the household
                        INHBBIHJ, # main source of household income (it is repeated in the metadata?)
                        INHBELIH, # Taxable household income
                        INHBESTINKH, # net income
                        INHBRUTINKH, # gross income
                        INHEHALGR, # home ownership and whether the household receives a rent allowance
                        INHGESTINKH, # net income adjusted for household size and composition
                        INHPRIMINKH, # primary income (income from work, assets and own business)
                        INHSAMHH, # information about the household composition
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2015, "database_2015.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2015)
rm(wealth2015)
rm(EIGENDOM2015)
rm(address_2015)
rm(database_2015)




#### Calculations for the year 2014

# load the CBS income database specific for the year we want
income2014 <- read_sav("G:/InkomenBestedingen/INHATAB/INHA2014TABV1.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2014 <- mutate(income2014, ID = paste(income2014$RINPERSOONSHKW, income2014$RINPERSOONHKW))

# extract observations from the "address" database corresponding to the year 2014
address_2014 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2014-01-02") & move_out > as.Date("2014-01-01"))

# Add RINOBJECTNUMMER to the income2014 database
income2014 <- merge(income2014, address_2014, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2014 database
income2014 <- merge(income2014, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2014 database
income2014 <- merge(income2014, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2014$gem2014 <- sprintf("%04d", income2014$gem2014)

# merge the databases with the Functional Urban Area information
income2014 <- merge(income2014, FUA, by.x ="gem2014", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2014 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2014TABV2.sav")

# Add an ID variable to the wealth database
wealth2014 <- mutate(wealth2014, ID = paste(wealth2014$RINPERSOONSHKW, wealth2014$RINPERSOONHKW))

# merge the income data with the wealth data
income2014 <- merge(income2014, wealth2014, by = "ID", all.x = TRUE, all.y = FALSE)
# load EIGENDOM data
EIGENDOM2014 <- read_sav("G:/BouwenWonen/EIGENDOMTAB/EIGENDOM2014TABV6.sav")

# merge the income and wealth data with the property data
income2014 <- merge(income2014, EIGENDOM2014, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2014 <- income2014[complete.cases(income2014$TypeEigendom),]
income2014 <- subset(income2014, TypeEigendom != "O")

# Create a variable for homeowners
income2014 <- income2014 %>%
  mutate(Ownership = if_else(TypeEigendom == "E", 1, 0))

# Identify which house-owners have a mortgage
income2014 <- income2014 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & TypeEigendom == "E", 1, 0))

# Create a variable for renters of social housing
income2014 <- income2014 %>%
  mutate(Social_housing = if_else(TypeEigenaar == "C", 1, 0))

# Create a variable for renters of non-social housing
income2014 <- income2014 %>%
  mutate(Non_social_rental_housing = if_else(TypeEigenaar == "V", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2014 <- select(income2014, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        INHAHL, #number of people in the household
                        INHBBIHJ, # main source of household income (it is repeated in the metadata?)
                        INHBELIH, # Taxable household income
                        INHBESTINKH, # net income
                        INHBRUTINKH, # gross income
                        INHEHALGR, # home ownership and whether the household receives a rent allowance
                        INHGESTINKH, # net income adjusted for household size and composition
                        INHPRIMINKH, # primary income (income from work, assets and own business)
                        INHSAMHH, # information about the household composition
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2014, "database_2014.csv", row.names = FALSE)


# Delete data from the memory to enable further operations
rm(income2014)
rm(wealth2014)
rm(EIGENDOM2014)
rm(address_2014)
rm(database_2014)




#### Calculations for the year 2013

# load the CBS income database specific for the year we want
income2013 <- read_sav("G:/InkomenBestedingen/INHATAB/INHA2013TABV2.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2013 <- mutate(income2013, ID = paste(income2013$RINPERSOONSHKW, income2013$RINPERSOONHKW))

# extract observations from the "address" database corresponding to the year 2013
address_2013 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2013-01-02") & move_out > as.Date("2013-01-01"))

# Add RINOBJECTNUMMER to the income2013 database
income2013 <- merge(income2013, address_2013, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2013 database
income2013 <- merge(income2013, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2013 database
income2013 <- merge(income2013, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2013$gem2013 <- sprintf("%04d", income2013$gem2013)

# merge the databases with the Functional Urban Area information
income2013 <- merge(income2013, FUA, by.x ="gem2013", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2013 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2013TABV2.sav")

# Add an ID variable to the wealth database
wealth2013 <- mutate(wealth2013, ID = paste(wealth2013$RINPERSOONSHKW, wealth2013$RINPERSOONHKW))

# merge the income data with the wealth data
income2013 <- merge(income2013, wealth2013, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOM data
EIGENDOM2013 <- read_sav("G:/BouwenWonen/EIGENDOMTAB/EIGENDOM2013TABV6.sav")

# merge the income and wealth data with the property data
income2013 <- merge(income2013, EIGENDOM2013, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2013 <- income2013[complete.cases(income2013$TypeEigendom),]
income2013 <- subset(income2013, TypeEigendom != "O")

# Create a variable for homeowners
income2013 <- income2013 %>%
  mutate(Ownership = if_else(TypeEigendom == "E", 1, 0))

# Identify which house-owners have a mortgage
income2013 <- income2013 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & TypeEigendom == "E", 1, 0))

# Create a variable for renters of social housing
income2013 <- income2013 %>%
  mutate(Social_housing = if_else(TypeEigenaar == "C", 1, 0))

# Create a variable for renters of non-social housing
income2013 <- income2013 %>%
  mutate(Non_social_rental_housing = if_else(TypeEigenaar == "V", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2013 <- select(income2013, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        INHAHL, #number of people in the household
                        INHBBIHJ, # main source of household income (it is repeated in the metadata?)
                        INHBELIH, # Taxable household income
                        INHBESTINKH, # net income
                        INHBRUTINKH, # gross income
                        INHEHALGR, # home ownership and whether the household receives a rent allowance
                        INHGESTINKH, # net income adjusted for household size and composition
                        INHPRIMINKH, # primary income (income from work, assets and own business)
                        INHSAMHH, # information about the household composition
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2013, "database_2013.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2013)
rm(wealth2013)
rm(EIGENDOM2013)
rm(address_2013)
rm(database_2013)




#### Calculations for the year 2012

# load the CBS income database specific for the year we want
income2012 <- read_sav("G:/InkomenBestedingen/INHATAB/INHA2012TABV2.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2012 <- mutate(income2012, ID = paste(income2012$RINPERSOONSHKW, income2012$RINPERSOONHKW))

# extract observations from the "address" database corresponding to the year 2012
address_2012 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2012-01-02") & move_out > as.Date("2012-01-01"))

# Add RINOBJECTNUMMER to the income2012 database
income2012 <- merge(income2012, address_2012, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2012 database
income2012 <- merge(income2012, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2012 database
income2012 <- merge(income2012, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2012$gem2012 <- sprintf("%04d", income2012$gem2012)

# merge the databases with the Functional Urban Area information
income2012 <- merge(income2012, FUA, by.x ="gem2012", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2012 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2012TABV2.sav")

# Add an ID variable to the wealth database
wealth2012 <- mutate(wealth2012, ID = paste(wealth2012$RINPERSOONSHKW, wealth2012$RINPERSOONHKW))

# merge the income data with the wealth data
income2012 <- merge(income2012, wealth2012, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOM data
EIGENDOM2012 <- read_sav("G:/BouwenWonen/EIGENDOMTAB/EIGENDOM2012TABV6.sav")

# merge the income and wealth data with the property data
income2012 <- merge(income2012, EIGENDOM2012, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2012 <- income2012[complete.cases(income2012$TypeEigendom),]
income2012 <- subset(income2012, TypeEigendom != "O")

# Create a variable for homeowners
income2012 <- income2012 %>%
  mutate(Ownership = if_else(TypeEigendom == "E", 1, 0))

# Identify which house-owners have a mortgage
income2012 <- income2012 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & TypeEigendom == "E", 1, 0))

# Create a variable for renters of social housing
income2012 <- income2012 %>%
  mutate(Social_housing = if_else(TypeEigenaar == "C", 1, 0))

# Create a variable for renters of non-social housing
income2012 <- income2012 %>%
  mutate(Non_social_rental_housing = if_else(TypeEigenaar == "V", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2012 <- select(income2012, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        INHAHL, #number of people in the household
                        INHBBIHJ, # main source of household income (it is repeated in the metadata?)
                        INHBELIH, # Taxable household income
                        INHBESTINKH, # net income
                        INHBRUTINKH, # gross income
                        INHEHALGR, # home ownership and whether the household receives a rent allowance
                        INHGESTINKH, # net income adjusted for household size and composition
                        INHPRIMINKH, # primary income (income from work, assets and own business)
                        INHSAMHH, # information about the household composition
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2012, "database_2012.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2012)
rm(wealth2012)
rm(EIGENDOM2012)
rm(address_2012)
rm(database_2012)




#### Calculations for the year 2011

# load the CBS income database specific for the year we want
income2011 <- read_sav("G:/InkomenBestedingen/INHATAB/INHA2011TABV2.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2011 <- mutate(income2011, ID = paste(income2011$RINPERSOONSHKW, income2011$RINPERSOONHKW))

# extract observations from the "address" database corresponding to the year 2011
address_2011 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2011-01-02") & move_out > as.Date("2011-01-01"))

# Add RINOBJECTNUMMER to the income2011 database
income2011 <- merge(income2011, address_2011, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2011 database
income2011 <- merge(income2011, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2011 database
income2011 <- merge(income2011, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2011$gem2011 <- sprintf("%04d", income2011$gem2011)

# merge the databases with the Functional Urban Area information
income2011 <- merge(income2011, FUA, by.x ="gem2011", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2011 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2011TABV2.sav")

# Add an ID variable to the wealth database
wealth2011 <- mutate(wealth2011, ID = paste(wealth2011$RINPERSOONSHKW, wealth2011$RINPERSOONHKW))

# merge the income data with the wealth data
income2011 <- merge(income2011, wealth2011, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOMWOZTAB data
EIGENDOM2011 <- read_sav("G:/BouwenWonen/EIGENDOMWOZTAB/2011/140930 EIGENDOMWOZTAB2011V1.sav")

# merge the income and wealth data with the property data
income2011 <- merge(income2011, EIGENDOM2011, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2011 <- income2011[complete.cases(income2011$VBOEIGENDOM),]
income2011 <- income2011[complete.cases(income2011$VBOVERHUURDER),]
income2011 <- subset(income2011, VBOVERHUURDER != ".")

# Create a variable for homeowners
income2011 <- income2011 %>%
  mutate(Ownership = if_else(VBOEIGENDOM == "E", 1, 0))

# Identify which house-owners have a mortgage
income2011 <- income2011 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & VBOEIGENDOM == "E", 1, 0))

# Create a variable for renters of social housing
income2011 <- income2011 %>%
  mutate(Social_housing = if_else(VBOEIGENDOM == "B" | VBOEIGENDOM == "L" | VBOEIGENDOM == "S", 1, 0))

# Create a variable for renters of non-social housing
income2011 <- income2011 %>%
  mutate(Non_social_rental_housing = if_else(VBOEIGENDOM == "O", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2011 <- select(income2011, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        INHAHL, #number of people in the household
                        INHBBIHJ, # main source of household income (it is repeated in the metadata?)
                        INHBELIH, # Taxable household income
                        INHBESTINKH, # net income
                        INHBRUTINKH, # gross income
                        INHEHALGR, # home ownership and whether the household receives a rent allowance
                        INHGESTINKH, # net income adjusted for household size and composition
                        INHPRIMINKH, # primary income (income from work, assets and own business)
                        INHSAMHH, # information about the household composition
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2011, "database_2011.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2011)
rm(wealth2011)
rm(EIGENDOM2011)
rm(address_2011)
rm(database_2011)



#### Calculations for the year 2010

# load the CBS income database specific for the year we want
income2010 <- read_sav("G:/InkomenBestedingen/INTEGRAAL HUISHOUDENS INKOMEN/2010/HUISHBVRINK2010TABV3.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2010 <- mutate(income2010, ID = paste(income2010$RINPERSOONSKERN, income2010$RINPERSOONKERN))

# extract observations from the "address" database corresponding to the year 2010
address_2010 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2010-01-02") & move_out > as.Date("2010-01-01"))

# Add RINOBJECTNUMMER to the income2010 database
income2010 <- merge(income2010, address_2010, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2010 database
income2010 <- merge(income2010, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2010 database
income2010 <- merge(income2010, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2010$gem2010 <- sprintf("%04d", income2010$gem2010)

# merge the databases with the Functional Urban Area information
income2010 <- merge(income2010, FUA, by.x ="gem2010", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2010 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2010TABV3.sav")

# Add an ID variable to the wealth database
wealth2010 <- mutate(wealth2010, ID = paste(wealth2010$RINPERSOONSHKW, wealth2010$RINPERSOONHKW))

# merge the income data with the wealth data
income2010 <- merge(income2010, wealth2010, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOMWOZTAB data
EIGENDOM2010 <- read_sav("G:/BouwenWonen/EIGENDOMWOZTAB/2010/140930 EIGENDOMWOZTAB2010V1.sav")

# merge the income and wealth data with the property data
income2010 <- merge(income2010, EIGENDOM2010, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2010 <- income2010[complete.cases(income2010$VBOEIGENDOM),]
income2010 <- income2010[complete.cases(income2010$VBOVERHUURDER),]
income2010 <- subset(income2010, VBOVERHUURDER != ".")

# Create a variable for homeowners
income2010 <- income2010 %>%
  mutate(Ownership = if_else(VBOEIGENDOM == "E", 1, 0))

# Identify which house-owners have a mortgage
income2010 <- income2010 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & VBOEIGENDOM == "E", 1, 0))

# Create a variable for renters of social housing
income2010 <- income2010 %>%
  mutate(Social_housing = if_else(VBOEIGENDOM == "B" | VBOEIGENDOM == "L" | VBOEIGENDOM == "S", 1, 0))

# Create a variable for renters of non-social housing
income2010 <- income2010 %>%
  mutate(Non_social_rental_housing = if_else(VBOEIGENDOM == "O", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2010 <- select(income2010, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        BVRAHL, #number of people in the household
                        BVRBBIHALG1, # main source of household income (it is repeated in the metadata?)
                        BVRBESTINKH, # net income
                        BVRBRUTINKH, # gross income
                        BVREHALG, # home ownership and whether the household receives a rent allowance
                        BVRGESTINKH, # net income adjusted for household size and composition
                        BVRPRIMINKH, # primary income (income from work, assets and own business)
                        BVRSAMHHR, # information about the household composition
                        BVRBBIHJ, # main source of household income (it is repeated in the metadata?)
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2010, "database_2010.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2010)
rm(wealth2010)
rm(EIGENDOM2010)
rm(address_2010)
rm(database_2010)



#### Calculations for the year 2009

# load the CBS income database specific for the year we want
income2009 <- read_sav("G:/InkomenBestedingen/INTEGRAAL HUISHOUDENS INKOMEN/2009/HUISHBVRINK2009TABV3.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2009 <- mutate(income2009, ID = paste(income2009$RINPERSOONSKERN, income2009$RINPERSOONKERN))

# extract observations from the "address" database corresponding to the year 2009
address_2009 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2009-01-02") & move_out > as.Date("2009-01-01"))

# Add RINOBJECTNUMMER to the income2009 database
income2009 <- merge(income2009, address_2009, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2009 database
income2009 <- merge(income2009, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2009 database
income2009 <- merge(income2009, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2009$gem2009 <- sprintf("%04d", income2009$gem2009)

# merge the databases with the Functional Urban Area information
income2009 <- merge(income2009, FUA, by.x ="gem2009", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2009 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2009TABV3.sav")

# Add an ID variable to the wealth database
wealth2009 <- mutate(wealth2009, ID = paste(wealth2009$RINPERSOONSHKW, wealth2009$RINPERSOONHKW))

# merge the income data with the wealth data
income2009 <- merge(income2009, wealth2009, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOMWOZTAB data
EIGENDOM2009 <- read_sav("G:/BouwenWonen/EIGENDOMWOZTAB/2009/140930 EIGENDOMWOZTAB2009V1.sav")

# merge the income and wealth data with the property data
income2009 <- merge(income2009, EIGENDOM2009, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2009 <- income2009[complete.cases(income2009$VBOEIGENDOM),]
income2009 <- income2009[complete.cases(income2009$VBOVERHUURDER),]
income2009 <- subset(income2009, VBOVERHUURDER != ".")

# Create a variable for homeowners
income2009 <- income2009 %>%
  mutate(Ownership = if_else(VBOEIGENDOM == "E", 1, 0))

# Identify which house-owners have a mortgage
income2009 <- income2009 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & VBOEIGENDOM == "E", 1, 0))

# Create a variable for renters of social housing
income2009 <- income2009 %>%
  mutate(Social_housing = if_else(VBOEIGENDOM == "B" | VBOEIGENDOM == "L" | VBOEIGENDOM == "S", 1, 0))

# Create a variable for renters of non-social housing
income2009 <- income2009 %>%
  mutate(Non_social_rental_housing = if_else(VBOEIGENDOM == "O", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2009 <- select(income2009, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        BVRAHL, #number of people in the household
                        BVRBBIHALG1, # main source of household income (it is repeated in the metadata?)
                        BVRBESTINKH, # net income
                        BVRBRUTINKH, # gross income
                        BVREHALG, # home ownership and whether the household receives a rent allowance
                        BVRGESTINKH, # net income adjusted for household size and composition
                        BVRPRIMINKH, # primary income (income from work, assets and own business)
                        BVRSAMHHR, # information about the household composition
                        BVRBBIHJ, # main source of household income (it is repeated in the metadata?)
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2009, "database_2009.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2009)
rm(wealth2009)
rm(EIGENDOM2009)
rm(address_2009)
rm(database_2009)



#### Calculations for the year 2008

# load the CBS income database specific for the year we want
income2008 <- read_sav("G:/InkomenBestedingen/INTEGRAAL HUISHOUDENS INKOMEN/2008/HUISHBVRINK2008TABV3.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2008 <- mutate(income2008, ID = paste(income2008$RINPERSOONSKERN, income2008$RINPERSOONKERN))

# extract observations from the "address" database corresponding to the year 2008
address_2008 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2008-01-02") & move_out > as.Date("2008-01-01"))

# Add RINOBJECTNUMMER to the income2008 database
income2008 <- merge(income2008, address_2008, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2008 database
income2008 <- merge(income2008, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2008 database
income2008 <- merge(income2008, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2008$gem2008 <- sprintf("%04d", income2008$gem2008)

# merge the databases with the Functional Urban Area information
income2008 <- merge(income2008, FUA, by.x ="gem2008", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2008 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2008TABV3.sav")

# Add an ID variable to the wealth database
wealth2008 <- mutate(wealth2008, ID = paste(wealth2008$RINPERSOONSHKW, wealth2008$RINPERSOONHKW))

# merge the income data with the wealth data
income2008 <- merge(income2008, wealth2008, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOMWOZTAB data
EIGENDOM2008 <- read_sav("G:/BouwenWonen/EIGENDOMWOZTAB/2008/140930 EIGENDOMWOZTAB2008V1.sav")

# merge the income and wealth data with the property data
income2008 <- merge(income2008, EIGENDOM2008, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2008 <- income2008[complete.cases(income2008$VBOEIGENDOM),]
income2008 <- income2008[complete.cases(income2008$VBOVERHUURDER),]
income2008 <- subset(income2008, VBOVERHUURDER != ".")

# Create a variable for homeowners
income2008 <- income2008 %>%
  mutate(Ownership = if_else(VBOEIGENDOM == "E", 1, 0))

# Identify which house-owners have a mortgage
income2008 <- income2008 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & VBOEIGENDOM == "E", 1, 0))

# Create a variable for renters of social housing
income2008 <- income2008 %>%
  mutate(Social_housing = if_else(VBOEIGENDOM == "B" | VBOEIGENDOM == "L" | VBOEIGENDOM == "S", 1, 0))

# Create a variable for renters of non-social housing
income2008 <- income2008 %>%
  mutate(Non_social_rental_housing = if_else(VBOEIGENDOM == "O", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2008 <- select(income2008, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        BVRAHL, #number of people in the household
                        BVRBBIHALG1, # main source of household income (it is repeated in the metadata?)
                        BVRBESTINKH, # net income
                        BVRBRUTINKH, # gross income
                        BVREHALG, # home ownership and whether the household receives a rent allowance
                        BVRGESTINKH, # net income adjusted for household size and composition
                        BVRPRIMINKH, # primary income (income from work, assets and own business)
                        BVRSAMHHR, # information about the household composition
                        BVRBBIHJ, # main source of household income (it is repeated in the metadata?)
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2008, "database_2008.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2008)
rm(wealth2008)
rm(EIGENDOM2008)
rm(address_2008)
rm(database_2008)




#### Calculations for the year 2007

# load the CBS income database specific for the year we want
income2007 <- read_sav("G:/InkomenBestedingen/INTEGRAAL HUISHOUDENS INKOMEN/2007/HUISHBVRINK2007TABV3.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2007 <- mutate(income2007, ID = paste(income2007$RINPERSOONSKERN, income2007$RINPERSOONKERN))

# extract observations from the "address" database corresponding to the year 2007
address_2007 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2007-01-02") & move_out > as.Date("2007-01-01"))

# Add RINOBJECTNUMMER to the income2007 database
income2007 <- merge(income2007, address_2007, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2007 database
income2007 <- merge(income2007, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2007 database
income2007 <- merge(income2007, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2007$gem2007 <- sprintf("%04d", income2007$gem2007)

# merge the databases with the Functional Urban Area information
income2007 <- merge(income2007, FUA, by.x ="gem2007", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2007 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2007TABV3.sav")

# Add an ID variable to the wealth database
wealth2007 <- mutate(wealth2007, ID = paste(wealth2007$RINPERSOONSHKW, wealth2007$RINPERSOONHKW))

# merge the income data with the wealth data
income2007 <- merge(income2007, wealth2007, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOMWOZTAB data
EIGENDOM2007 <- read_sav("G:/BouwenWonen/EIGENDOMWOZTAB/2007/140930 EIGENDOMWOZTAB2007V1.sav")

# merge the income and wealth data with the property data
income2007 <- merge(income2007, EIGENDOM2007, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2007 <- income2007[complete.cases(income2007$VBOEIGENDOM),]
income2007 <- income2007[complete.cases(income2007$VBOVERHUURDER),]
income2007 <- subset(income2007, VBOVERHUURDER != ".")

# Create a variable for homeowners
income2007 <- income2007 %>%
  mutate(Ownership = if_else(VBOEIGENDOM == "E", 1, 0))

# Identify which house-owners have a mortgage
income2007 <- income2007 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & VBOEIGENDOM == "E", 1, 0))

# Create a variable for renters of social housing
income2007 <- income2007 %>%
  mutate(Social_housing = if_else(VBOEIGENDOM == "B" | VBOEIGENDOM == "L" | VBOEIGENDOM == "S", 1, 0))

# Create a variable for renters of non-social housing
income2007 <- income2007 %>%
  mutate(Non_social_rental_housing = if_else(VBOEIGENDOM == "O", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2007 <- select(income2007, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        BVRAHL, #number of people in the household
                        BVRBBIHALG1, # main source of household income (it is repeated in the metadata?)
                        BVRBESTINKH, # net income
                        BVRBRUTINKH, # gross income
                        BVREHALG, # home ownership and whether the household receives a rent allowance
                        BVRGESTINKH, # net income adjusted for household size and composition
                        BVRPRIMINKH, # primary income (income from work, assets and own business)
                        BVRSAMHHR, # information about the household composition
                        BVRBBIHJ, # main source of household income (it is repeated in the metadata?)
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2007, "database_2007.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2007)
rm(wealth2007)
rm(EIGENDOM2007)
rm(address_2007)
rm(database_2007)




#### Calculations for the year 2006

# load the CBS income database specific for the year we want
income2006 <- read_sav("G:/InkomenBestedingen/INTEGRAAL HUISHOUDENS INKOMEN/2006/HUISHBVRINK2006TABV3.sav")

# Use mutate() to create a new column "ID" by combining the RINPERSOON and RINPERSOONS columns of the databases of interest
income2006 <- mutate(income2006, ID = paste(income2006$RINPERSOONSKERN, income2006$RINPERSOONKERN))

# extract observations from the "address" database corresponding to the year 2006
address_2006 <- address %>%
  mutate(move_in = as.Date(as.character(move_in), format = "%Y%m%d"),
         move_out = as.Date(as.character(move_out), format = "%Y%m%d")) %>%
  filter(move_in < as.Date("2006-01-02") & move_out > as.Date("2006-01-01"))

# Add RINOBJECTNUMMER to the income2006 database
income2006 <- merge(income2006, address_2006, by ="ID", all.x = TRUE, all.y = FALSE)

# Add coordinates to the income2006 database
income2006 <- merge(income2006, grid, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Add municipality to the income2006 database
income2006 <- merge(income2006, municipality, by ="RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# make gemeente codes be consistent with the FUA file
#income2006$gem2006 <- sprintf("%04d", income2006$gem2006)

# merge the databases with the Functional Urban Area information
income2006 <- merge(income2006, FUA, by.x ="gem2006", by.y = "Code", all.x = TRUE, all.y = FALSE)

# load VEHTAB data
wealth2006 <- read_sav("G:/InkomenBestedingen/VEHTAB/VEH2006TABV3.sav")

# Add an ID variable to the wealth database
wealth2006 <- mutate(wealth2006, ID = paste(wealth2006$RINPERSOONSHKW, wealth2006$RINPERSOONHKW))

# merge the income data with the wealth data
income2006 <- merge(income2006, wealth2006, by = "ID", all.x = TRUE, all.y = FALSE)

# load EIGENDOMWOZTAB data
EIGENDOM2006 <- read_sav("G:/BouwenWonen/EIGENDOMWOZTAB/2006/140930 EIGENDOMWOZTAB2006V1.sav")

# merge the income and wealth data with the property data
income2006 <- merge(income2006, EIGENDOM2006, by = "RINOBJECTNUMMER", all.x = TRUE, all.y = FALSE)

# Delete missing values
income2006 <- income2006[complete.cases(income2006$VBOEIGENDOM),]
income2006 <- income2006[complete.cases(income2006$VBOVERHUURDER),]
income2006 <- subset(income2006, VBOVERHUURDER != ".")

# Create a variable for homeowners
income2006 <- income2006 %>%
  mutate(Ownership = if_else(VBOEIGENDOM == "E", 1, 0))

# Identify which house-owners have a mortgage
income2006 <- income2006 %>%
  mutate(Mortgage = if_else(VEHW1210SHYH > 0 & VBOEIGENDOM == "E", 1, 0))

# Create a variable for renters of social housing
income2006 <- income2006 %>%
  mutate(Social_housing = if_else(VBOEIGENDOM == "B" | VBOEIGENDOM == "L" | VBOEIGENDOM == "S", 1, 0))

# Create a variable for renters of non-social housing
income2006 <- income2006 %>%
  mutate(Non_social_rental_housing = if_else(VBOEIGENDOM == "O", 1, 0))

# Create a dataframe with what looks relevant (for the moment)
database_2006 <- select(income2006, 
                        ID, # ID
                        FUA, # FUA
                        Municipality, # Municipality
                        VRLVIERKANT100M, # 100 m x 100 m grid cell
                        VRLVIERKANT500M, # 500 m x 500 m grid cell
                        BVRAHL, #number of people in the household
                        BVRBBIHALG1, # main source of household income (it is repeated in the metadata?)
                        BVRBESTINKH, # net income
                        BVRBRUTINKH, # gross income
                        BVREHALG, # home ownership and whether the household receives a rent allowance
                        BVRGESTINKH, # net income adjusted for household size and composition
                        BVRPRIMINKH, # primary income (income from work, assets and own business)
                        BVRSAMHHR, # information about the household composition
                        BVRBBIHJ, # main source of household income (it is repeated in the metadata?)
                        VEHW1000VERH, # Household total assets
                        VEHW1100BEZH, # Household possessions
                        VEHW1110FINH, # Financial assets
                        VEHW1111BANH, # Savings and deposits
                        VEHW1112EFFH, # Bonds and shares
                        VEHW1120ONRH, # value of real estate
                        VEHW1121WONH, # Value of the own owned home
                        VEHW1130ONDH, # value of the entrepreneurial capital
                        VEHW1140ABEH, # value of significant interests of a household (more than 5% of a company)
                        VEHW1150OVEH, # Other household assets (e.g. cash)
                        VEHW1200STOH, # Household debts
                        VEHW1210SHYH, # Mortgage debt
                        VEHW1220SSTH, # Student debt
                        VEHW1230SOVH, # Other debts
                        Ownership, # Ownership of the house where the household lives
                        Mortgage, # Having a mortgage
                        Social_housing, # Living in social housing
                        Non_social_rental_housing, #Living in private rental housing
) 


# Save the data
write.csv(database_2006, "database_2006.csv", row.names = FALSE)

# Delete data from the memory to enable further operations
rm(income2006)
rm(wealth2006)
rm(EIGENDOM2006)
rm(address_2006)
rm(database_2006)