
#'     THIS script
#'     
#'     contains the code to analyze some general system characteristics

#     > sources of income
#     > main crops

#######################################################################################################
library(dplyr)

# WHAT ARE THE MAIN SOURCES OF INCOME FOR RESPONDENTS IN BIRERE AND RUGAAGA

# import dataframe land_utilization with information on all fields
income <- read.csv(header = T, "./data_analysis/data/raw/A4_income_sources_anom.csv")

# import dataframe land_utilization with information on all fields
hh_char <- read.csv(header = T, "./data_analysis/data/raw/A1_hh_characteristics_anom.csv")
sub_county <- subset(hh_char, select = c("Name","Sub.county"))

#' merge dataframe income with sub_county
income <- merge(sub_county, income, by = c("Name"))



#' during the first few interviews, the absolute % of total income was asked for each income source
#' this changed later, and we asked the income sources in a more qualitative way, because repondents had 
#' trouble using absolute percentages. 
#' The importance of each source of income was indicated as asked as follows:
#'
#'  1 =  All or nearly all     (87-100%)
#'  2 =  More than half of it  (63-87%)
#'  3 =  About half of it      (38-62%)
#'  4 =  Less than half of it  (13-37%)
#'  5 =  A small amount        (1-12%)
#'  
#' for respondents answering in absolute percentages, change this to the classes defined above
#' 
 income_bananas <- subset(income, select = c(1:4))
  
income_bananas <-  mutate(income_bananas,
                class = case_when(
                per_ban_sales  ==  1  ~ "87 - 100",
                per_ban_sales  ==  2  ~ "63 - 86",
                per_ban_sales  ==  3  ~ "38 - 62",
                per_ban_sales  ==  4  ~ "13 - 37",
                per_ban_sales  ==  5  ~ "1 -  12",
                per_ban_sales  ==  0  ~  "0",
                per_ban_sales  >   5 & per_ban_sales < 13 ~  "1 -  12",
                per_ban_sales  >= 13 & per_ban_sales < 37 ~  "13 - 37",
                per_ban_sales  >= 37 & per_ban_sales < 63 ~  "38 -  62",
                per_ban_sales  >= 63 & per_ban_sales < 87 ~ "63 - 86", 
                per_ban_sales  >= 86                      ~ "87 - 100"))

income_bananas <-   income_bananas %>% 
  group_by(class, Sub.county) %>% 
  summarise(count = n()) %>% 
  group_by(Sub.county) %>%
  mutate(percentage = count / sum(count)* 100)  

#'  % of farmers for whom bananas is the main source of income (i.e. those who report that banana represents
#'  more than half of their total household income
income_bananas$class <- as.factor(income_bananas$class)

income_bananas <-subset(income_bananas, income_bananas$class == "63 - 86" |
                                        income_bananas$class == "87 - 100")
