
#'     THIS script
#'     
#'     contains the code to analyze some general system characteristics

#     > nr of fields owned by respondents
#     > nr of banana fields owned by respondents
#     > avg size of banana fields
#     > main crops

require(ggplot2)

#######################################################################################################


#NR OF FIELDS CULTIVATED, AND NUMBER OF BANANA FIELDS PER HOUSEHOLD

# import dataframe land_utilization with information on all fields
hh_char <- read.csv(header = T, "./data_analysis/data/raw/A1_hh_characteristics_anom.csv")
sub_county <- subset(hh_char, select = c("Name","Sub.county"))

land_utilization <- read.csv(header = T, "./data_analysis/data/raw/A2_land_utilisation_anom.csv")
land_utilization <- merge(sub_county, land_utilization, by = c("Name"))
land_utilization$ID <- (paste(land_utilization$Name, land_utilization$Field.ID))

field_info <- read.csv(header = T, "./data_analysis/Data/raw/B9_field_characteristics_anom.csv")
field_info <- subset(field_info, field_info$Field.ID != "No banana field")

#Birere: average number of fields cultivated, standard deviation
all_fields_birere <- subset(land_utilization, land_utilization$Sub.county == "Birere")
length(unique(all_fields_birere$ID)) / length(unique(all_fields_birere$Name)) 
a <- subset(all_fields_birere, select = c("Name", "nr.of.banana.fields"))
a <- a[!duplicated(a),]
sd(a$nr.of.banana.fields)

#Rugaaga: average number of fields cultivated, standard deviation
all_fields_rugaaga <- subset(land_utilization, land_utilization$Sub.county == "Rugaaga")
length(unique(all_fields_rugaaga$ID)) / length(unique(all_fields_rugaaga$Name)) 
b <- subset(all_fields_rugaaga, select = c("Name", "nr.of.banana.fields"))
b <- b[!duplicated(b),]
sd(b$nr.of.banana.fields)

#Birere: average number of banana fields, standard deviation

all_banana_fields_birere <- subset(land_utilization, land_utilization$Sub.county == "Birere" & land_utilization$main.crop.use == "banana")
length(unique(all_banana_fields_birere$ID)) / length(unique(all_fields_birere$Name)) 
a <- subset(all_banana_fields_birere, select = c("Name", "nr.of.banana.fields"))
a <- a[!duplicated(a),]
sd(a$nr.of.banana.fields)

#Rugaaga: average number of banana fields, standard deviation
all_banana_fields_rugaaga <- subset(land_utilization, land_utilization$Sub.county == "Rugaaga" & land_utilization$main.crop.use == "banana")
length(unique(all_banana_fields_rugaaga$ID)) / length(unique(all_fields_rugaaga$Name)) 
a <- subset(all_banana_fields_rugaaga, select = c("Name", "nr.of.banana.fields"))
a <- a[!duplicated(a),]
sd(a$nr.of.banana.fields)

remove(a,b)

##################################################################################### 



# SIZE OF BANANA AREA CULTIVATED


#prepare dataframe
hh_banana_area <- subset(field_info, select = c(1:5))
hh_banana_area <- hh_banana_area[!is.na(hh_banana_area$Field.size),]
colnames(hh_banana_area)[colnames(hh_banana_area) == "Field.size"] <- "field_size_acre"
hh_banana_area$field_size_acre <- as.numeric(as.character(hh_banana_area$field_size_acre))
hh_banana_area$field_ha <- hh_banana_area$field_size_acre*0.4046

aggregator <- function(data, VOIs, GOIs, FUNC='mean', PROB=""){
  VAR_list <- GRP_list <- list()
  for(i in 1:length(GOIs)){  GRP_list[[i]] <- data[[GOIs[i]]]}
  for(i in 1:length(VOIs)){  VAR_list[[i]] <- data[[VOIs[i]]]}
  if(PROB == ""){newdata <- aggregate(VAR_list, by=GRP_list, FUN=FUNC, na.rm=T)} else 
  {newdata <- aggregate(VAR_list, by=GRP_list, FUN=FUNC, na.rm=T, probs=PROB)}
  newdata$FUNCTION <- paste(FUNC, PROB)
  newdata <- newdata[,c(which(colnames(newdata)=="FUNCTION"), which(colnames(newdata)!="FUNCTION"))]
  names(newdata) <- c("FUNCTION", GOIs, VOIs)
  return(newdata)}   

#run aggregator function
hh_banana_area <- aggregator(hh_banana_area, c("field_ha"), c("Name"), 'sum')
hh_banana_area <- merge(hh_banana_area, sub_county, by =c("Name"))

#Birere: average size of banana fields
a <- subset(hh_banana_area, hh_banana_area$Sub.county == "Birere")
mean(a$field_ha)
sd(a$field_ha)

#Rugaaga: average size of banana fields
b <- subset(hh_banana_area, hh_banana_area$Sub.county == "Rugaaga")
mean(b$field_ha)
sd(b$field_ha)

#################################################################################################





################################################################################################

#make figure with ordered bar-charts for banana area cultivated per household
par(mfrow=c(1,2))

#Birere: ordered bar chart with banana area
banana_area_birere <- subset(hh_banana_area, hh_banana_area$Sub.county == "Birere", select = c("Name","field_ha"))

plot_banana_area_birere <- ggplot(banana_area_birere, aes(x=reorder(Name, +field_ha), y= field_ha))+
  geom_bar(stat="identity")+
  xlab("Birere")+
  ylim(0,2.5)+
  ylab("banana area cultivated (ha/household)")+
  theme(axis.text.x = element_blank())

#Rugaaga: ordered bar chart with banana area
banana_area_rugaaga <- subset(hh_banana_area, hh_banana_area$Sub.county == "Rugaaga", select = c("Name","field_ha"))

plot_banana_area_rugaaga <- ggplot(banana_area_rugaaga, aes(x=reorder(Name, +field_ha), y= field_ha))+
  geom_bar(stat="identity")+
  xlab("Rugaaga")+
  coord_cartesian(ylim = c(0, 2.5))+
  ylab("")+
  theme(axis.text.x = element_blank())

ggarrange(plot_banana_area_birere, plot_banana_area_rugaaga)

#remove unneeded files
remove(plot_banana_area_birere, plot_banana_area_rugaaga)


