#'
#'
#' This script contains the code to analyze how much manure banana farmers in Birere and Rugaaga
#' are applying on their fields. 
#' 
#' It contains the following steps
#'  1) FW manure application per field is calculated in kg/ha/yr
#'  2) Manure use classes are defined (i.e. 0, 0-2, 2-5 t/ha/yr)
#'  3) It is investigated which % of the total surveyed banana area falls in each of the classes
#'  4) It is investigated which % of the respondents falls in each of the classes
#' 
#' 

library(plyr)
library(dplyr)
library(ggplot2)
library(reshape2)
#library(ggpubr)

source("./data_analysis/scripts/my_figure_theme.R")

# set working directory and import files
hh_characteristics          <- read.csv(header = T,"./data_analysis/data/raw/A1_hh_characteristics_anom.csv")
inputs_per_field            <- read.csv(header = T,"./data_analysis/data/raw/B1_inputs_per_field_anom.csv")
input_application_frequency <- read.csv(header = T,"./data_analysis/data/raw/B5_input_application_frequency_anom.csv") 
input_quantity              <- read.csv(header = T,"./data_analysis/data/raw/B6_input_quantities_anom.csv")
field_characteristics       <- read.csv(header = T,"./data_analysis/data/raw/B9_field_characteristics_anom.csv") 
sub_county                  <- subset(hh_characteristics, select = c("Name","Sub.county"))


#' for respondent 76 (Rugaaga), 
#' remove this respondent manually from 'sub-county
sub_county <- sub_county[!(sub_county$Name == 76),]
colnames(sub_county)  <-  tolower(colnames(sub_county))
colnames(sub_county)   <- gsub(".","_",colnames(sub_county) , fixed = T)


#'Define function that makes the input dataframes a bit more tidy
  
input_tidyer <- function(input_dataframe) {
  
  #all column names to lower case, and replace "."
  colnames(input_dataframe)   <-   tolower(colnames(input_dataframe))
  colnames(input_dataframe)   <- gsub(".","_",colnames(input_dataframe) , fixed = T)
  
  #remove fields that are not banana 
  input_dataframe    <- subset(input_dataframe, 
                                input_dataframe$field_id != "No banana field")
  # assign unique field_id
  input_dataframe$field_id <- (paste(input_dataframe$name, input_dataframe$field_id)) 
  
  return(input_dataframe = input_dataframe)
}

inputs_per_field            <- input_tidyer(inputs_per_field)
input_application_frequency <- input_tidyer(input_application_frequency)
input_quantity              <- input_tidyer(input_quantity)
field_characteristics       <- input_tidyer(field_characteristics)


#' merge dataframes and remove unneeded dataframes and functions
all_field_info <- merge(sub_county, inputs_per_field, by= c("name"), all = T)
all_field_info <- merge(all_field_info, input_application_frequency, 
                        by= c("name", "field_id", "nr_of_banana_fields"), all = T)
all_field_info <- merge(all_field_info, input_quantity, 
                        by= c("name","field_id", "nr_of_banana_fields"), all = T)
all_field_info <- merge(all_field_info, field_characteristics,
                        by= c("field_id", "name", "nr_of_banana_fields"), all = T)

remove(field_characteristics, hh_characteristics, sub_county, input_application_frequency,
       input_quantity, input_tidyer,inputs_per_field)

# convert field_size, with the measured area in acres, to hectares
all_field_info$field_size <- all_field_info$field_size*0.4046
all_field_info <- all_field_info %>%  dplyr::rename(field_size_ha = field_size)

#select relevant columns to calculate manure input per ha
manure_input_field <- subset(all_field_info, select = c(1,2,4,5,9,10, 17:21,52,53,69))
manure_input_field$time_year <- as.numeric(as.character(manure_input_field$cow_dung_times_y))

#check which units are used for manure input application
levels(manure_input_field$manure_unit)
#'convert units to kg 
#'>> see docs//Banana agronomy//data//rangelands manure//overview_nutrient_content_prices
#'for justifications

# basin                    = 11.00 kg  
#'wheelbarrow              = 21.45 kg 
#'elf                      = 1978  kg
#'and Fuso and Forward     = 6875 kg
#'#'wheelbarrow/mat       = 21.45 *1600 * field_size_ha 

#calculate the total amount of manure applied on each field
manure_input_field <- mutate(manure_input_field,  total_cow_dung_FW = case_when(
                      manure_unit == "basin" ~  11*manure_count,
                      manure_unit == "wheelbarrow" ~ 21.45*manure_count,
                      manure_unit == "wheel barrows/mat" ~ 21.45*manure_count*1600*field_size_ha,
                      manure_unit == "elf" ~ 1978*manure_count,
                      manure_unit == "fuso" |
                      manure_unit == "forward" ~ 6875*manure_count))

#calculate the total amount of manure applied on each field per ha/year
manure_input_field$cow_dung_ha_FW    <- manure_input_field$total_cow_dung_FW / manure_input_field$field_size_ha
manure_input_field$cow_dung_ha_FW_yr <- manure_input_field$cow_dung_ha_FW * manure_input_field$time_year

#'calculate the manure applicatoin in dry matter/ha/year
#'FW to DW conversion is *0.55 (source: measurements Godfrey
#'#'>> see docs//Banana agronomy//data//rangelands manure//overview_nutrient_content_prices)
manure_input_field$cow_dung_ha_DM_yr <- manure_input_field$cow_dung_ha_FW_yr * 0.55


#' add column with information on cow_dung_ha_yr: is NA due to
#'  infrequent applications 
#'  no field size info
#'  no manure applications
#'  
manure_input_field <- mutate(manure_input_field, explanation = case_when(
                      cow_dung_times_y == "no information" ~ "nf",                    
                      cow_dung_times_y == "not frequent"   ~ "nf",
                      cow_dung_ha_FW_yr  > 0                  ~ "frequent application",
                      cow_dung_y_n == "n"                  ~ "no manure application",
                      is.na(field_size_ha) == T            ~ "no field size info"
                      ))

manure_input_field$explanation <- as.factor(manure_input_field$explanation)

#' if manure is not applied, set $cow_dung_ha_yr  to 0

manure_input_field$cow_dung_ha_DM_yr <- if_else(manure_input_field$explanation == "no manure application",
                                      0,  manure_input_field$cow_dung_ha_DM_yr)

#select only the relevant columns
manure_input_field <- subset(manure_input_field, select = c("field_id","name","sub_county", "field_size_ha",
                                                            "cow_dung_ha_DM_yr","explanation"))


#create dataframe that contains only the fields where manure is applied frequently
selection_manure_input_field <- subset(manure_input_field, manure_input_field$cow_dung_ha_DM_yr >0)
selection_manure_input_field$cow_dung_ha_ton_DM_yr <- selection_manure_input_field$cow_dung_ha_DM_yr /
                                                      1000  

#inspect boxplots 
plot_abs_quant <- ggplot(selection_manure_input_field, aes(x= sub_county, y = cow_dung_ha_ton_DM_yr))+
                  geom_boxplot()+
                  ylim(0,25)+
                  xlab("")+
                  ylab("manure application (t DM/ha/year)")+
                  my_figure_theme

#remove outlier with unlikely high application rates i.e. more than 100 tons/ha/year,
# keep Na's
selection_manure_input_field <- subset(selection_manure_input_field,
                                       selection_manure_input_field$cow_dung_ha_DM_yr <100000)

manure_input_field$temp <- ifelse(is.na(manure_input_field$cow_dung_ha_DM_yr == T), "ok", 
                                  ifelse(manure_input_field$cow_dung_ha_DM_yr <100000, "ok",
                                         "too high"))

manure_input_field      <- subset(manure_input_field, !manure_input_field$temp == "too high")
manure_input_field$temp <- NULL


# divide fields into classes

manure_input_field <- mutate(manure_input_field, 
                      class = case_when(
                      cow_dung_ha_DM_yr == 0     ~ "0",
                      cow_dung_ha_DM_yr <= 2000  ~ "0-2",
                      cow_dung_ha_DM_yr <= 5000  ~ "2-5",
                      cow_dung_ha_DM_yr <= 10000 ~ "5-10",
                      cow_dung_ha_DM_yr <= 20000 ~ "10-20",
                      cow_dung_ha_DM_yr >  20000 ~ "20+",
                      explanation == "nf" ~ "nf"))

#change factor levels
manure_input_field$class <- factor(manure_input_field$class, 
                            levels = 
                            c( "0","nf", "0-2", "2-5", "5-10", "10-20", "20+")) 
                      
# function to aggregate field-sizes per class
aggregator <- function(data, VOIs, GOIs, FUNC='mean', PROB=""){
  VAR_list <- GRP_list <- list()
  for(i in 1:length(GOIs)){  GRP_list[[i]] <- data[[GOIs[i]]]}
  for(i in 1:length(VOIs)){  VAR_list[[i]] <- data[[VOIs[i]]]}
  if(PROB == ""){newdata <- aggregate(VAR_list, by=GRP_list, FUN=FUNC, na.rm=T)} else 
  {newdata <- aggregate(VAR_list, by=GRP_list, FUN=FUNC, na.rm=T, probs=PROB)}
  newdata$FUNCTION <- paste(FUNC, PROB)
  newdata <- newdata[,c(which(colnames(newdata)=="FUNCTION"), which(colnames(newdata)!="FUNCTION"))]
  names(newdata) <- c("FUNCTION", GOIs, VOIs)
  return(newdata)}       

#aggregate field areas per manure class
table_manure_class <- aggregator(manure_input_field,
                                 c("field_size_ha"),
                                 c ("class", "sub_county"), 'sum')
table_manure_class$FUNCTION <- NULL
table_manure_class$class <- as.factor(table_manure_class$class)

#calculate total measured banana area
total_area <-  aggregate(table_manure_class$field_size_ha,
               by = list(sub_county = table_manure_class$sub_county),
               FUN = sum)

#calculate the % of total area surveyed  
table_manure_class <- merge(table_manure_class, total_area, by = c("sub_county"))
table_manure_class$percentage_area_coverage  <- table_manure_class$field_size_ha / table_manure_class$x
table_manure_class$x <- NULL

ggplot(table_manure_class, aes(x = class, y = percentage_area_coverage, fill = sub_county))+
  geom_bar(stat = "identity", position = "dodge")

remove(total_area)

#______________________________________________________________________________________________________


# calculate average manure application rate (kg/ha/yr) per respondent

percentage_respondents_class <-  aggregator(manure_input_field, 
                                 c("cow_dung_ha_DM_yr"),
                                 c ("name", "sub_county"), 'mean')
percentage_respondents_class$FUNCTION <- NULL

#define classes
percentage_respondents_class <- mutate(percentage_respondents_class, 
                               class = case_when(
                               cow_dung_ha_DM_yr == 0     ~ "0",
                               cow_dung_ha_DM_yr <= 2000  ~ "0-2",
                               cow_dung_ha_DM_yr <= 5000  ~ "2-5",
                               cow_dung_ha_DM_yr <= 10000 ~ "5-10",
                               cow_dung_ha_DM_yr <= 20000 ~ "10-20",
                               cow_dung_ha_DM_yr >  20000 ~ "20+"))
        
percentage_respondents_class$temp  <- is.nan(percentage_respondents_class$cow_dung_ha_DM_yr)
percentage_respondents_class$class <- ifelse(percentage_respondents_class$temp == T,
                                             "nf", 
                                             percentage_respondents_class$class) 
percentage_respondents_class$temp <- NULL

#change factor levels of $class
percentage_respondents_class$class <- factor(percentage_respondents_class$class , 
                                   levels = 
                                     c( "0","nf", "0-2", "2-5", "5-10", "10-20", "20+"))

#nr of respondents per sub_county
nr_resp_birere <- length(which(percentage_respondents_class$sub_county == "Birere"))
nr_resp_rugaaga <- length(which(percentage_respondents_class$sub_county == "Rugaaga"))

#count nr of respondents per class, per sub_county, and calculate % of each class per sub_county
percentage_respondents_class <- percentage_respondents_class %>% dplyr::count(sub_county, class)

percentage_respondents_class$percentage_resp <- ifelse(percentage_respondents_class$sub_county == "Birere",
                                           percentage_respondents_class$n / nr_resp_birere,
                                           ifelse(percentage_respondents_class$sub_county == "Rugaaga",
                                           percentage_respondents_class$n / nr_resp_rugaaga,
                                                  NA))
remove(nr_resp_birere, nr_resp_rugaaga)

#plot
ggplot(percentage_respondents_class, aes(x = class, y = percentage_resp, fill = sub_county))+
  geom_bar(stat = "identity", position = "dodge")

#'merge the two dataframes: percentage_manure_use and table_manure_use

manure_use_area_resp <- merge(table_manure_class, percentage_respondents_class, by =c("sub_county", "class"), all.manure_use_area_resp = T)
manure_use_area_resp <- manure_use_area_resp[c("sub_county", "class", "percentage_area_coverage", "percentage_resp" )]
manure_use_area_resp <- melt(manure_use_area_resp, id.vars = c("sub_county", "class"))

manure_use_area_resp[is.na(manure_use_area_resp)] <- 0

manure_use_area_resp$variable <-  revalue(manure_use_area_resp$variable,c(
                                  "percentage_area_coverage" = "% of banana area", 
                                  "percentage_resp"         = "% of respondents" ))

manure_use_area_resp$value <- manure_use_area_resp$value *100

#make separate dataframe for Birere and Rugaaga
birere_manure_use <- subset(manure_use_area_resp, manure_use_area_resp$sub_county == "Birere")
rugaaga_manure_use <- subset(manure_use_area_resp, manure_use_area_resp$sub_county == "Rugaaga")

 

#' make bargraphs  showing the % of respondents in each class and the % land coverage in each class
plot_birere_manure_use <-  ggplot(birere_manure_use, aes(x = class, y = value, fill = variable))+
                           geom_bar(stat = "identity", position = "dodge")+
                           scale_fill_grey()+
                           ggtitle("Birere")+
                           xlab("Manure application (t DM/ha/year)")+
                           ylab("")+
                           theme(axis.text.x = element_text(angle = 90))+
                           my_figure_theme
                        


plot_rugaaga_manure_use <-  ggplot(rugaaga_manure_use, aes(x = class, y = value, fill = variable))+
  geom_bar(stat = "identity", position = "dodge")+
  scale_fill_grey()+
  ggtitle("Rugaaga")+
  ylab("")+
  xlab("Manure application (t DM/ha/year)")+
  theme(axis.text.x = element_text(angle = 90))+
  my_figure_theme 

#inspect boxplots 
plot_abs_quant <- ggplot(selection_manure_input_field, aes(x= sub_county, y = cow_dung_ha_ton_DM_yr))+
  geom_boxplot()+
  scale_fill_grey()+
  scale_y_continuous(limits = c(0,25), breaks = seq(0,25, by = 5))+
  ylab("Manure application
    (t DM/ha/year)")+
  xlab("")+
  my_figure_theme

#arrange all plots in 1 figure
manure_quantities  <-
  ggarrange(plot_birere_manure_use, plot_rugaaga_manure_use, plot_abs_quant,
          common.legend = T,
          labels = c("A", "B", "c"),
          ncol = 2, nrow = 2)


#save as png's
#5a
png(filename = "./results/figure_5a.png") 
print(plot_birere_manure_use)
dev.off()

#5b
png(filename = "./results/figure_5b.png") 
print(plot_rugaaga_manure_use)
dev.off()

#5c
png(filename = "./results/figure_5c.png") 
print(plot_abs_quant)
dev.off()


# all together
png(filename = "./results/figure_5.png") 
print(manure_quantities)
dev.off()
 
 #save as eps
 ggexport(manure_quantities, filename = "./Output/Fig5.eps")

remove(all_field_info, plot_abs_quant, plot_birere_manure_use, plot_rugaaga_manure_use,
       my_figure_theme)







