# Niccolo` Bassetti	- niccolo.bassetti@protonmail.com

# Set working directories
rm(list=ls())
sessionInfo() #check packages available
setwd("")	# set working directory

# Set R library folder location
.libPaths()
Sys.getenv("R_LIBS_USER") 		#same as above

# Install R packages (update.packages(), install.packages(), library())
packageurl <- "https://cran.r-project.org/src/contrib/Archive/devtools/devtools_1.13.2.tar.gz" #write URL of package to install
install.packages(packageurl, repos=NULL, type="source") #install old version packages
install.packages("glue")

### Import library and mydata
packages = c("tidyverse", "binom","dplyr","RColorBrewer")
lapply(packages, library, character.only = TRUE)

mydata <- read.table("figure_S2.txt",header=TRUE,sep="")
head(mydata)
names(mydata)

dim(mydata)
str(mydata)

# Transform class variables
sapply(FUN = class, mydata)
mydata$ID <- as.factor(mydata$ID)

# Search NAs
sapply(mydata, function(x)(sum(is.na(x))))		# search NAs
mydata <- na.omit(mydata)		# revome NAs


### Summary stastics and exploratory plotting mydata
summary(mydata)
tapply(mydata$HR_corrected_10eggs, list(mydata$Species,mydata$staining), mean)	 # mean variable for each Species
tapply(mydata$HR_corrected_10eggs, list(mydata$Species,mydata$staining), sd)
#se <- function(x) sqrt(var(x)/length(x))
#tapply(mydata$HR_corrected_10eggs, mydata$Species, se)

par(mfrow=c(1,1))		# (row, column)
boxplot(mydata$HR_corrected_10eggs~ mydata$Species)	# boxplot each Species

par(mfrow=c(2,2))
hist(subset(mydata, Genotype == "BRO-030")$Survival) # NOT USED
rug(jitter(subset(mydata, Genotype == "BRO-030")$Survival)) # NOT USED

# Check normality of the mydata
qqnorm(mydata$HR_corrected_10eggs)
qqline(mydata$HR_corrected_10eggs)
shapiro.test(mydata$HR_corrected_10eggs)		# test for normality (small P = NOT NORMAL)
fligner.test(mydata$HR_corrected_10eggs~mydata$Species)	# test for equal variances (small P = NOT EQUAL VARIANCE)


### Boxplot mydata
require(ggplot2)


# Reformat dataframe for plotting purpose
data  <-  as_tibble(mydata)
options(pillar.sigfig = 5) # visualize decimals in tibble
data <- data %>% filter(`staining_type`=="TB")
data <- data %>% dplyr::select(Species, staining, HR, staining) %>% gather(HR, staining, key = class, value = value)
data$Species <- as.character(data$Species)

head(data)

cbPalette=c("Set1") # choose palette colours
size=15
# Boxplot
par(mfrow=c(1,1))		# (row, column)
plot = ggplot(data) + # use geom_boxplot or geom_violin
  geom_boxplot(aes(x=Species, y=value, fill=class),
               colour="black", # black outline for boxes
               outlier.colour=NA) +
  geom_point(mapping=aes(x=Species,y=value, fill=class), 
             colour="black", # black outline for points
             size=3,
             shape=19,
             #alpha=0.3, # alpha makes points transparent
             position=position_jitterdodge(jitter.width=0,jitter.height=0,dodge.width=0.75,seed=NA)) +
  # labs(x="", y=bquote('H202 (DAB staining in mm2)'), size=size) +
  labs(x="", y=bquote('cell death (TB staining in mm2)'), size=size) +
  scale_fill_manual(values=c("white","gray")) + # set color for fills - http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
  #scale_fill_brewer(palette=cbPalette) + # set color for fills - http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
  scale_colour_brewer(palette=cbPalette) + # set color for line/points - 
  theme_bw() + # remove gray background 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5, size=size),
        axis.text.y = element_text(size=size),
        axis.title=element_text(size=size)) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + # remove grid https://felixfan.github.io/ggplot2-remove-grid-background-margin/
  # theme(legend.position="none") + # none remove legend
  theme(legend.position = c(0.85, 0.90)) +
  theme(legend.title = element_blank()) +
  theme(legend.text = element_text(colour="black", size=(size-5), face="plain")) + # hange fac for style text
  # scale_x_discrete(limits=c("2", "0.5", "1")) + # reorder scale x and legend
  scale_x_discrete(labels=c("B. nigra", "B. oleracea", "C. hispanica")) +
  scale_y_continuous(breaks=c(0,1,2,3,4,5), limits=c(0,5.5)) +
  theme(aspect.ratio = 15/15) # fix the x/y ratio of the plot
plot(plot)
ggsave("PLOT_NEW.png", plot=plot, width = 6, height = 6, units="in") # CHANGE
ggsave(file="Figure_TB-staining.svg", plot=plot, width=6, height=6, units="in")
dev.off()


### Statistical analysis ###
require(multcomp, multcompView, FSA, rcompanion,lsmeans)

## 1) HR vs staining ## 
# Select the data
mydata <- read.table("D:/PhD/OneDrive - WageningenUR/1_Data/0_Eddie_coevolution/Image_analysis_Salome.txt",header=TRUE,sep="")

data  <-  as_tibble(mydata)
options(pillar.sigfig = 3) # visualize decimals in tibble
data <- data %>% filter(`staining_type`=="TB")
data <- data %>% filter(`Species`=="Crambe_hispanica")

data <- data %>% dplyr::select(Species, staining, HR, staining)
data$Species <- as.factor(data$Species)
# data = data %>% gather("staining", "HR", key="imaging", value="area")
# data$imaging <- as.factor(data$imaging)

head(data)
mydata = data

# unpaired t-test
shapiro.test(sqrt(mydata$area))		# test for normality (small P = NOT NORMAL)
fligner.test(sqrt(mydata$area)~mydata$imaging)	# test for equal variances (small P = NOT EQUAL VARIANCE)

t.test(sqrt(mydata$staining), sqrt(mydata$HR))
wilcox.test(sqrt(mydata$staining), sqrt(mydata$HR), alternative = "two.sided")

# Check normality of the mydata
par(mfrow=c(2,2))	
qqnorm(sqrt(mydata$area))
qqline(sqrt(mydata$area))
shapiro.test(sqrt(mydata$area))		# test for normality (small P = NOT NORMAL)
fligner.test(sqrt(mydata$area)~mydata$Species)	# test for equal variances (small P = NOT EQUAL VARIANCE)
fligner.test(sqrt(mydata$area)~mydata$imaging)	# test for equal variances (small P = NOT EQUAL VARIANCE)


# linear model
data=mydata
lm1 <- lm(sqrt(area) ~ Species+imaging, data)
plot(lm1)

summary(lm1)
anova(lm1)
AIC(lm1) # goodness-of-fit measure - smaller values are better
BIC(lm1) # goodness-of-fit measure - smaller values are better
coef(lm1) # coefficients of the model
confint(lm1) # confidence interval

# ANOVA 

# multiple comparison - one factor
pairs <- glht(lm1, linfct = mcp(Species = "Tukey"))
summary(pairs) 
cld(pairs, level=0.05)
confint(pairs)

# multiple comparison - multiple factor (emmeans package)
lsmeans(lm1, pairwise ~ CO2 | YEAR)


# non-parametric test (Anova) - https://rcompanion.org/rcompanion/d_06.html
kruskal.test(sqrt(mydata$area)~mydata$Species)

# pairwise comparison 1: Dunn test (NOT USED)
PT = dunnTest(sqrt(mydata$area)~mydata$Species,
              data=mydata,
              method="bh")    # require "FSA", p.adjust to be used to adjust p-values
PT
PT = PT$res

cldList(comparison = PT$Comparison, # gives weird results, not correspondin with Wilcoxon
        p.value    = PT$P.adj,
        threshold  = 0.05) # require "rcompanion"



# pairwise comparison 2: Withney-Mann U test/Wilcoxon rank sum test
PT = pairwise.wilcox.test(sqrt(mydata$area), mydata$Species,
                          p.adjust.method = "BH")
PT = PT$p.value
PT1 = round(fullPTable(PT),3) # require "rcompanion"
PT1

multcompLetters(PT1,
                compare="<",
                threshold=0.05,
                Letters=letters,
                reversed = FALSE) # require "multcompView"

## 2) HR ## 
# Select the data
mydata <- read.table("D:/PhD/OneDrive - WageningenUR/1_Data/0_Eddie_coevolution/Image_analysis_Salome.txt",header=TRUE,sep="")

data  <-  as_tibble(mydata)
options(pillar.sigfig = 3) # visualize decimals in tibble
data <- data %>% filter(`staining_type`=="DAB")

data <- data %>% dplyr::select(Species, staining, HR, staining)
data$Species <- as.factor(data$Species)

head(data)

mydata = data

# Check normality of the mydata
par(mfrow=c(2,2))	
qqnorm(sqrt(mydata$HR))
qqline(sqrt(mydata$HR))
shapiro.test(sqrt(mydata$HR))		# test for normality (small P = NOT NORMAL)
fligner.test(sqrt(mydata$HR)~mydata$Species)	# test for equal variances (small P = NOT EQUAL VARIANCE)

# linear model
data=mydata
lm1 <- lm(sqrt(HR) ~ Species, data)
plot(lm1)

summary(lm1)
anova(lm1)
AIC(lm1) # goodness-of-fit measure - smaller values are better
BIC(lm1) # goodness-of-fit measure - smaller values are better
coef(lm1) # coefficients of the model
confint(lm1) # confidence interval

  # multiple comparison
  pairs <- glht(lm1, linfct = mcp(Species = "Tukey"))
  summary(pairs) 
  cld(pairs, level=0.05)
  confint(pairs)



# non-parametric test (Anova) - https://rcompanion.org/rcompanion/d_06.html
kruskal.test(sqrt(mydata$HR)~mydata$Species)

  # pairwise comparison 1: Dunn test (NOT USED)
  PT = dunnTest(sqrt(mydata$HR)~mydata$Species,
                data=mydata,
                method="bh")    # require "FSA", p.adjust to be used to adjust p-values
  PT
  PT = PT$res
  
  cldList(comparison = PT$Comparison, # gives weird results, not correspondin with Wilcoxon
          p.value    = PT$P.adj,
          threshold  = 0.05) # require "rcompanion"



  # pairwise comparison 2: Withney-Mann U test/Wilcoxon rank sum test
  PT = pairwise.wilcox.test(sqrt(mydata$HR), mydata$Species,
                            p.adjust.method = "BH")
  PT = PT$p.value
  PT1 = round(fullPTable(PT),3) # require "rcompanion"
  PT1
  
  multcompLetters(PT1,
                  compare="<",
                  threshold=0.05,
                  Letters=letters,
                  reversed = FALSE) # require "multcompView"

## 3) Staining ##
# Select the data
mydata <- read.table("D:/PhD/OneDrive - WageningenUR/1_Data/0_Eddie_coevolution/Image_analysis_Salome.txt",header=TRUE,sep="")

data  <-  as_tibble(mydata)
options(pillar.sigfig = 3) # visualize decimals in tibble
data <- data %>% filter(`staining_type`=="DAB")

data <- data %>% dplyr::select(Species, staining, HR, staining)
data$Species <- as.factor(data$Species)

head(data)
mydata = data

# Check normality of the mydata
par(mfrow=c(2,2))	
qqnorm(sqrt(mydata$staining))
qqline(sqrt(mydata$staining))
shapiro.test(sqrt(mydata$staining))		# test for normality (small P = NOT NORMAL)
fligner.test(sqrt(mydata$staining)~mydata$Species)	# test for equal variances (small P = NOT EQUAL VARIANCE)

# linear model
data=mydata
lm1 <- lm(sqrt(staining) ~ Species, data)
plot(lm1)

summary(lm1)
anova(lm1)
AIC(lm1) # goodness-of-fit measure - smaller values are better
BIC(lm1) # goodness-of-fit measure - smaller values are better
coef(lm1) # coefficients of the model
confint(lm1) # confidence interval

  # multiple comparison
  pairs <- glht(lm1, linfct = mcp(Species = "Tukey"))
  summary(pairs) 
  cld(pairs, level=0.05)
  confint(pairs)



# non-parametric test (Anova) - https://rcompanion.org/rcompanion/d_06.html
kruskal.test(sqrt(mydata$staining)~mydata$Species)

# pairwise comparison 1: Dunn test (NOT USED)
PT = dunnTest(sqrt(mydata$staining)~mydata$Species,
              data=mydata,
              method="bh")    # require "FSA", p.adjust to be used to adjust p-values
PT
PT = PT$res

cldList(comparison = PT$Comparison, # gives weird results, not correspondin with Wilcoxon
        p.value    = PT$P.adj,
        threshold  = 0.05) # require "rcompanion"



  # pairwise comparison 2: Withney-Mann U test/Wilcoxon rank sum test
  PT = pairwise.wilcox.test(sqrt(mydata$HR), mydata$Species,
                            p.adjust.method = "BH")
  PT = PT$p.value
  PT1 = round(fullPTable(PT),3) # require "rcompanion"
  PT1
  
  multcompLetters(PT1,
                  compare="<",
                  threshold=0.05,
                  Letters=letters,
                  reversed = FALSE) # require "multcompView"