#Section 1: Create clean R workspace ----
rm(list=ls())
graphics.off()
gc(FALSE)
cat("\014")
Sys.setenv(LANG = "en")
options(max.print = 20000)

#Section 2: Open libraries and datasets ----
#Open packages:
Pkgs2Load <- c("readxl","tidyr", "ggplot2", "ggh4x", "emmeans", "multcomp", "glmmTMB", "DHARMa", "vegan")

invisible(lapply(Pkgs2Load, library, character.only = TRUE))

#Set the working directory.
setwd("C:/Users/luukc/OneDrive - Wageningen University & Research/General/Projects/Collab Luuk Fogelina Carabid beetles/Data analyses/Luuk")

#Import data files
Broek <- read_xlsx("Broekemahoeve_all.xlsx")

Droef <- read_xlsx("Droevendaal_2019-2022.xlsx")

Erf20 <- read.csv2("ERF Gz5 2020_def.csv")
Erf21 <- read.csv2("ERF Gz5 20212022_def.csv")

Val <- read_xlsx("Valthermond_all.xlsx")

#Section 3: Prepare the data ----
cols <- c("year","field","crop","treatment")
#Subsection 3.1: Broekemahoeve ----
#Subsection 3.1.1: Remove sammples that were not done or not relevant
Broek <- Broek[Broek$Done != 0,]

Broek$Year <- format(Broek$Date_collection, format = "%Y")
Broek$Month <- format(Broek$Date_collection, format = "%m")
Broek$Location <- "Lelystad"

Broek <- Broek[Broek$Year == "2019" & (Broek$Treatment == "REF_SPACE" | Broek$Treatment == "STRIP") |
                Broek$Year == "2020" & (Broek$Field == "J9_1" | Broek$Field == "J10_7") |
                 Broek$Year == "2021" & Broek$Field == "J8_1",]
Broek <- Broek[Broek$Crop != "Wheat",]


#Subsection 3.1.2: Aggregate rounds
Broek$yearserienr <- paste(Broek$Year, Broek$ID, sep = ".")
BroekY <- aggregate(Broek[,18:72], by = list(Broek$yearserienr),  FUN = sum)
colnames(BroekY)[1] <- "yearserienr"

#Subsection 3.1.3: Add meta-data
BroekR1 <- Broek[Broek$Month == "07",]
BroekY <- merge(BroekR1[,c(1:5,7,10,12,74,76)], BroekY, by = "yearserienr")

#Subsection 3.1.4: Remove incomplete year series
BroekY <- BroekY[BroekY$ID != "PF-B-031",]

#Subsection 3.1.5: Add diversity indices
BroekY$Amar <- BroekY$AmarAene + BroekY$AmarAuli + BroekY$AmarBifr + BroekY$AmarComm + BroekY$AmarCons + BroekY$AmarFami + BroekY$AmarFulv + BroekY$AmarOvat + BroekY$AmarSpre + BroekY$AmarTibi
BroekY$Anch <- BroekY$AnchDors
BroekY$Bemb <- BroekY$BembAene + BroekY$BembLamp + BroekY$BembIric + BroekY$BembProp + BroekY$BembFemo + BroekY$BembQuadrim + BroekY$BembTetr
BroekY$Blem <- BroekY$BlemDisc
BroekY$Cala <- BroekY$CalaCinc + BroekY$CalaErra + BroekY$CalaMela
BroekY$Cliv <- BroekY$ClivFoss + BroekY$ClivColl
BroekY$Harp <- BroekY$HarpAffi + BroekY$HarpGris + BroekY$HarpRufi + BroekY$HarpRubr + BroekY$HarpTard + BroekY$HarpFlav + BroekY$HarpDist + BroekY$HarpSign
BroekY$Lori <- BroekY$LoriPili
BroekY$Nebr <- BroekY$NebrSali + BroekY$NebrBrev
BroekY$Poec <- BroekY$PoecCupr + BroekY$PoecVers
BroekY$Pter <- BroekY$PterMela + BroekY$PterNige + BroekY$PterVern
BroekY$Trec <- BroekY$TrecObtu + BroekY$TrecQuad


#Subsection 3.2: Droevendaal ----
#Subsection 3.2.1: Remove samples that were not done or not relevant
Droef <- Droef[Droef$Done != 0,]

Droef$Year <- format(Droef$Date_collection, format = "%Y")
Droef$Month <- format(Droef$Date_collection, format = "%m")
Droef$Location <- "Wageningen"

Droef <- Droef[Droef$Treatment == "REF_SPACE" | Droef$Treatment == "STRIP",]
Droef <- Droef[Droef$Year == "2019" & (Droef$Field == "10" | Droef$Crop == "Potato") |
                 Droef$Year == "2020" & (Droef$Field == "10" | Droef$Field == "13" | Droef$Crop == "Pumpkin") |
                 Droef$Year == "2021" & (Droef$Field == "4" | Droef$Field == "5" | Droef$Field == "6") |
                 Droef$Year == "2022" & (Droef$Field == "4" | Droef$Field == "5" | Droef$Field == "6"),]

#Subsection 3.2.2: Change data format
Droef[,c(1:4,7:14)] <- lapply(Droef[,c(1:4,7:14)], as.factor)
Droef[,c(5,20:81)] <- lapply(Droef[,c(5,20:81)], as.numeric)

#Subsection 3.2.3: Remove incomplete rounds of wheat in 2020
Droef <- Droef[Droef$Crop != "Wheat" | 
                      Droef$Crop == "Wheat" & (Droef$Year == "2019" | 
                                                 Droef$Year == "2020" & (Droef$Round == 2 | Droef$Round == 3 | Droef$Round == 6) |
                                                 Droef$Year == "2021" | 
                                                 Droef$Year == "2022"),]

#Subsection 3.2.4: Aggregate rounds
DroefY <- aggregate(Droef[,20:81], by = list(Droef$ID),  FUN = sum)
colnames(DroefY)[1] <- "ID"

#Subsection 3.2.5: Add meta-data
DroefR1 <- Droef[(Droef$Year == "2019" | Droef$Year == "2022") & Droef$Round == "2" |
                   (Droef$Year == "2020" | Droef$Year == "2021") & Droef$Round == "4" |
                   (Droef$Year == "2020" & Droef$Crop == "Wheat" & Droef$Round == "3"),]
DroefY <- merge(DroefR1[,c(1:5,7,10,12,83)], DroefY, by = "ID")

#Subsection 3.2.6: Remove incomplete year series
DroefY <- DroefY[DroefY$ID != "PF-D-364",]
DroefY <- DroefY[DroefY$ID != "PF-D-768",]

#Subsection 3.2.7: Add diversity indices
DroefY$Amar <- DroefY$AmarAene + DroefY$AmarAuli + DroefY$AmarApri + DroefY$AmarBifr + DroefY$AmarComm + DroefY$AmarCons + DroefY$AmarFame + DroefY$AmarFami + DroefY$AmarFulv + DroefY$AmarOvat + DroefY$AmarSimi + DroefY$AmarSpre + DroefY$AmarTibi
DroefY$Anch <- DroefY$AnchDors
DroefY$Bemb <- DroefY$BembAene + DroefY$BembLamp + DroefY$BembIric + DroefY$BembProp + DroefY$BembFemo + DroefY$BembQuadrim + DroefY$BembTetr
DroefY$Blem <- DroefY$BlemDisc
DroefY$Cala <- DroefY$CalaCinc + DroefY$CalaErra + DroefY$CalaMela + DroefY$CalaRotu
DroefY$Cliv <- DroefY$ClivFoss + DroefY$ClivColl
DroefY$Harp <- DroefY$HarpAffi + DroefY$HarpGris + DroefY$HarpRufi + DroefY$HarpRubr + DroefY$HarpTard + DroefY$HarpFlav + DroefY$HarpDist + DroefY$HarpSign
DroefY$Lori <- DroefY$LoriPili
DroefY$Nebr <- DroefY$NebrSali + DroefY$NebrBrev
DroefY$Poec <- DroefY$PoecCupr + DroefY$PoecVers
DroefY$Pter <- DroefY$PterMela + DroefY$PterNige + DroefY$PterVern
DroefY$Trec <- DroefY$TrecObtu + DroefY$TrecQuad


#Subsection 3.3: ERF ----
#Subsection 3.3.1: Aggregate rounds
Erf20Y <- aggregate(Erf20[,17:29], by = list(Erf20$ID),  FUN = sum)
colnames(Erf20Y)[1] <- "ID"

Erf21$yearserienr <- paste(Erf21$Year, Erf21$ID, sep = ".")
Erf21Y <- aggregate(Erf21[,17:60], by = list(Erf21$yearserienr),  FUN = sum)
colnames(Erf21Y)[1] <- "yearserienr"

#Subsection 3.3.2: Add meta-data
Erf20R1 <- Erf20[Erf20$Round == "1",]
Erf20Y <- merge(Erf20R1[,c(1,8:13)], Erf20Y, by = "ID")

Erf21R1 <- Erf21[Erf21$Round == "2",]
Erf21Y <- merge(Erf21R1[,c(1,8:13,114)], Erf21Y, by = "yearserienr")

#Subsection 3.3.3: Remove incomplete year series
Erf20Y <- Erf20Y[Erf20Y$ID != "13",]

Erf21Y <- Erf21Y[Erf21Y$yearserienr != "2021.17",]
Erf21Y <- Erf21Y[Erf21Y$yearserienr != "2022.13",]
Erf21Y <- Erf21Y[Erf21Y$yearserienr != "2022.20",]
Erf21Y <- Erf21Y[Erf21Y$yearserienr != "2022.24",]
Erf21Y <- Erf21Y[Erf21Y$yearserienr != "2022.29",]
Erf21Y <- Erf21Y[Erf21Y$yearserienr != "2022.31",]
Erf21Y <- Erf21Y[Erf21Y$yearserienr != "2022.35",]
Erf21Y <- Erf21Y[Erf21Y$yearserienr != "2022.40",]

#Subsection 3.3.5: Add diversity indices
Erf20Y$Amar <- Erf20Y$Amara
Erf20Y$Anch <- Erf20Y$Anchomenus
Erf20Y$Bemb <- Erf20Y$Bembidion
Erf20Y$Blem <- Erf20Y$Blemus
Erf20Y$Cala <- 0
Erf20Y$Cliv <- Erf20Y$Clivina
Erf20Y$Harp <- Erf20Y$Harpulus
Erf20Y$Lori <- 0
Erf20Y$Nebr <- Erf20Y$Nebria
Erf20Y$Poec <- Erf20Y$Poecilus
Erf20Y$Pter <- Erf20Y$Pterostichus
Erf20Y$Trec <- Erf20Y$Trechus

Erf21Y$Amar <- Erf21Y$AmarAene + Erf21Y$AmarAuli + Erf21Y$AmarBifr + Erf21Y$AmarFami + Erf21Y$AmarPleb + Erf21Y$AmarSimi + Erf21Y$AmarUI
Erf21Y$Anch <- Erf21Y$AnchDors
Erf21Y$Bemb <- Erf21Y$BembBigu + Erf21Y$BembLamp + Erf21Y$BembLunu + Erf21Y$BembProp + Erf21Y$BembObtu + Erf21Y$BembQuadrim + Erf21Y$BembTetr + Erf21Y$BembUI
Erf21Y$Blem <- Erf21Y$BlemDisc
Erf21Y$Cala <- 0
Erf21Y$Cliv <- Erf21Y$ClivFoss
Erf21Y$Harp <- Erf21Y$HarpAffi + Erf21Y$HarpGris + Erf21Y$HarpRufi
Erf21Y$Lori <- Erf21Y$LoriPili
Erf21Y$Nebr <- Erf21Y$NebrBrev
Erf21Y$Poec <- Erf21Y$PoecCupr + Erf21Y$PoecVers
Erf21Y$Pter <- Erf21Y$PterAnth + Erf21Y$PterMela + Erf21Y$PterNige + Erf21Y$PterStren + Erf21Y$PterVern
Erf21Y$Trec <- Erf21Y$TrecObtu + Erf21Y$TrecQuad + Erf21Y$TrecUI


#Subsection 3.4: Valthermond ----
#Subsection 3.4.1: Remove samples that were not done or not relevant
Val <- subset(Val[Val$sampling_method == "potval",])

#Subsection 3.4.2: Create variables and remove irrelevant variables
Val2 <- Val[,c("year","round","trapnr","field","crop","treatment","id","xpos",
               "Anchomenus_dorsalis","Bembidion","Clivina_collaris","Clivina_fossor",
               "Amara_anthobia","Amara_ovata","Amara_fulva","Amara_aenae","Amara_bifrons",
               "Agonum_meulleri","Harpalus_onbekend","Harpalus_tardus","Harpalus_ruf","Harpalus_affinis","Harpalus_griseus",
               "Poecilus","Pterostichus_onbekend","Pterostichus_mel","Pterostichus_niger",
               "Calathus_erratus","Calathus_cinctus","Calathus_fuscipes","Calathus_melanocephalus",
               "Trechus_quadristriatus","Loricera_pilicornis")]


#Subsection 3.4.3: Create year series
Val2$yearserienr <- paste(Val2$year, Val2$trapnr, sep = ".")
ValY <- aggregate(Val2[,9:33], by = list(Val2$yearserienr),  FUN = sum)
colnames(ValY)[1] <- "yearserienr"

ValR1 <- Val2[Val2$round == "1",]
ValY <- merge(ValR1[,c(1,3:6,34)], ValY, by = "yearserienr")
ValY[cols] <- lapply(ValY[cols],factor)


#Subsection 3.4.4: Changes Dutch to English
ValY$crop <- factor(ValY$crop,
                    levels = c("gerst","gerst/boon","aardappel","grasklaver"),
                    labels = c("barley","barley/bean","potato","grass/clover"))

#Subsection 3.4.5: Add diversity indices
ValY$Amar <- ValY$Amara_aenae + ValY$Amara_anthobia + ValY$Amara_bifrons + ValY$Amara_fulva + ValY$Amara_ovata
ValY$Anch <- ValY$Anchomenus_dorsalis
ValY$Bemb <- ValY$Bembidion
ValY$Blem <- 0
ValY$Cala <- ValY$Calathus_cinctus + ValY$Calathus_erratus + ValY$Calathus_fuscipes + ValY$Calathus_melanocephalus
ValY$Cliv <- ValY$Clivina_collaris + ValY$Clivina_fossor
ValY$Harp <- ValY$Harpalus_affinis + ValY$Harpalus_griseus + ValY$Harpalus_onbekend + ValY$Harpalus_ruf + ValY$Harpalus_tardus
ValY$Lori <- ValY$Loricera_pilicornis
ValY$Nebr <- 0
ValY$Poec <- ValY$Poecilus
ValY$Pter <- ValY$Pterostichus_mel + ValY$Pterostichus_niger + ValY$Pterostichus_onbekend
ValY$Trec <- ValY$Trechus_quadristriatus


#Subsection 3.5: Merge datasets ----
#Subsection 3.5.1: Change column names

colnames(Erf20Y)[4] <- "Treatment"
Erf20Y$Location <- "Almere"

colnames(Erf21Y)[4] <- "Treatment"
Erf21Y$Location <- "Almere"

colnames(ValY)[2] <- "Year"
colnames(ValY)[3] <- "ID"
colnames(ValY)[5] <- "Crop"
colnames(ValY)[6] <- "Treatment"
ValY$Location <- "Valthermond"


#Subsection 3.5.2: Bind datasets
Total <- rbind(BroekY[,c(2,3,8:10,66:77)],
               DroefY[,c(1,2,7:9,72:83)],
               Erf20Y[,c(1:4,21:33)],
               Erf21Y[,c(2:4,8,53:65)],
               ValY[,c(2,3,5,6,32:44)])

#Subsection 3.5.3: Make treatment names consistent
Total$Treatment <- factor(Total$Treatment,
                       levels = c("mono", "REF_SPACE", "strip", "STRIP", "STRIP_3"),
                       labels = c("Monoculture", "Monoculture", "Strip", "Strip", "Strip"))
Total$Crop <- ifelse(Total$Crop == "barley", "Barley", Total$Crop)
Total$Crop <- ifelse(Total$Crop == "barley/bean", "Barley/Beans", Total$Crop)
Total$Crop <- ifelse(Total$Crop == "grass/clover", "Grass/Clover", Total$Crop)
Total$Crop <- ifelse(Total$Crop == "potato", "Potato", Total$Crop)

Total$Crop <- factor(Total$Crop,
                        levels = c("Barley", "Barley/Beans" , "Beans","Broccoli","Cabbage","Celeriac","Grass","Grass/Clover" ,"Oat","Onion","Parsnip","Potato","Pumpkin","Wheat"),
                        labels = c("Barley", "Barley\nBeans", "Beans","Broccoli","Cabbage","Celeriac","Grass","Grass\nClover","Oat","Onion","Parsnip","Potato","Pumpkin","Wheat"))


#Subsection 3.5.4: Make location-year interaction variable
Total$LocYear <- paste(Total$Location, Total$Year, sep = "_")
Total$LocYearCrop <- paste(Total$LocYear, Total$Crop, sep = "_")

#Section 4: Abundance / Activity density ----
#Subsection 4.1: Amara (none found at Lelystad)
TMAm <- glmmTMB(Amar ~ 1 + Treatment*Location +
                  (1|Year/Crop), 
                family = nbinom2(),
                dispformula = ~ 1,
                ziformula =~ 0,
                data = Total[Total$Location != "Lelystad",], na.action = na.fail)
HSD.TMAm <- emmeans(TMAm, pairwise ~ Treatment|Location, type = "response")
CLD.TMAm <- cld(HSD.TMAm$emmeans, Letters = letters)

#Subsection 4.2: Anchomenus (only 1 found at Lelystad)
TMAn <- glmmTMB(Anch ~ 1 + Treatment*Location +
                  (1|Year/Crop), 
                family = nbinom2(),
                dispformula = ~ 1,
                ziformula =~ 0,
                data = Total[Total$Location == "Almere" | Total$Location == "Valthermond",], na.action = na.fail)
HSD.TMAn <- emmeans(TMAn, pairwise ~ Treatment|Location)
CLD.TMAn <- cld(HSD.TMAn$emmeans, Letters = letters)

#Subsection 4.3: Bembidion
TMBe <- glmmTMB(Bemb ~ 1 + Treatment*Location +
                  (1|Year/Crop), 
                family = nbinom2(),
                dispformula = ~ 1,
                ziformula =~ 0,
                data = Total, na.action = na.fail)
HSD.TMBe <- emmeans(TMBe, pairwise ~ Treatment|Location, type = "response")
CLD.TMBe <- cld(HSD.TMBe$emmeans, Letters = letters)

#Subsection 4.4: Blemus (none found at Valthermond and Wageningen)
TMBl <- glmmTMB(Blem ~ 1 + Treatment*Location +
                  (1|Year/Crop), 
                family = nbinom2(),
                dispformula = ~ 1,
                ziformula =~ 0,
                data = Total[Total$Location == "Almere" | Total$Location == "Lelystad",], na.action = na.fail)
HSD.TMBl <- emmeans(TMBl, pairwise ~ Treatment|Location, type = "response")
CLD.TMBl <- cld(HSD.TMBl$emmeans, Letters = letters)

#Subsection 4.5: Calathus (none found at Almere and Lelystad)
TMCa <- glmmTMB(Cala ~ 1 + Treatment*Location +
                  (1|Year/Crop), 
                family = nbinom2(),
                dispformula = ~ 1,
                ziformula =~ 0,
                data = Total[Total$Location == "Valthermond" | Total$Location == "Wageningen",], na.action = na.fail)
HSD.TMCa <- emmeans(TMCa, pairwise ~ Treatment|Location, type = "response")
CLD.TMCa <- cld(HSD.TMCa$emmeans, Letters = letters)

#Subsection 4.6: Clivina
TMCl <- glmmTMB(Cliv ~ 1 + Treatment*Location +
                  (1|Year/Crop), 
                family = nbinom2(),
                dispformula = ~ 1,
                ziformula =~ 0,
                data = Total, na.action = na.fail)
HSD.TMCl <- emmeans(TMCl, pairwise ~ Treatment|Location, type = "response")
CLD.TMCl <- cld(HSD.TMCl$emmeans, Letters = letters)

#Subsection 4.7: Harpalus
TMHa <- glmmTMB(Harp ~ 1 + Treatment*Location +
                  (1|Year/Crop), 
                family = nbinom2(),
                dispformula = ~ 1,
                ziformula =~ 0,
                data = Total, na.action = na.fail)
HSD.TMHa <- emmeans(TMHa, pairwise ~ Treatment|Location, type = "response")
CLD.TMHa <- cld(HSD.TMHa$emmeans, Letters = letters)

#Subsection 4.8: Loricera
TMLo <- glmmTMB(Lori ~ 1 + Treatment*Location +
                  (1|Year/Crop), 
                family = nbinom2(),
                dispformula = ~ 1,
                ziformula =~ 0,
                data = Total, na.action = na.fail)
HSD.TMLo <- emmeans(TMLo, pairwise ~ Treatment|Location, type = "response")
CLD.TMLo <- cld(HSD.TMLo$emmeans, Letters = letters)

#Subsection 4.9: Nebria (only 2 found in Lelystad, none in Valthermond)
TMNe <- glmmTMB(Nebr ~ 1 + Treatment*Location +
                  (1|Year/Crop), 
                family = nbinom2(),
                dispformula = ~ 1,
                ziformula =~ 0,
                data = Total[Total$Location == "Almere" | Total$Location == "Wageningen",], na.action = na.fail)
HSD.TMNe <- emmeans(TMNe, pairwise ~ Treatment|Location, type = "response")
CLD.TMNe <- cld(HSD.TMNe$emmeans, Letters = letters)

#Subsection 4.10: Poecilus
TMPo <- glmmTMB(Poec ~ 1 + Treatment*Location +
                  (1|Year/Crop), 
                family = nbinom2(),
                dispformula = ~ 1,
                ziformula =~ 0,
                data = Total, na.action = na.fail)
HSD.TMPo <- emmeans(TMPo, pairwise ~ Treatment|Location, type = "response")
CLD.TMPo <- cld(HSD.TMPo$emmeans, Letters = letters)

#Subsection 4.11: Pterostichus
#Excluding Almere 2020, because of much higher values.
TMPt1 <- glmmTMB(Pter ~ 1 + Treatment*Location +
                  (1|Crop), 
                family = nbinom2(),
                dispformula = ~ 1,
                ziformula =~ 0,
                data = Total[!(Total$Location == "Almere" & Total$Year == "2020"),], na.action = na.fail)
HSD.TMPt1 <- emmeans(TMPt1, pairwise ~ Treatment|Location, type = "response")
CLD.TMPt1 <- cld(HSD.TMPt1$emmeans, Letters = letters)

#Only Almere 2020.
TMPt2 <- glmmTMB(Pter ~ 1 + Treatment +
                   (1|Crop), 
                 family = nbinom2(),
                 dispformula = ~ 1,
                 ziformula =~ 0,
                 data = Total[Total$Location == "Almere" & Total$Year == "2020",], na.action = na.fail)
HSD.TMPt2 <- emmeans(TMPt2, pairwise ~ Treatment, type = "response")
CLD.TMPt2 <- cld(HSD.TMPt2$emmeans, Letters = letters)

#Subsection 4.12: Trechus (none found in Valthermond)
TMTr <- glmmTMB(Trec ~ 1 + Treatment*Location +
                  (1|Year/Crop), 
                family = nbinom2(),
                dispformula = ~ 1,
                ziformula =~ 0,
                data = Total[Total$Location != "Valthermond",], na.action = na.fail)
HSD.TMTr <- emmeans(TMTr, pairwise ~ Treatment|Location, type = "response")
CLD.TMTr <- cld(HSD.TMTr$emmeans, Letters = letters)
