This HTML document contains the R-code that was used to perform analyses and create figures for the corresponding manuscript. At the end of the document the R version and package versions can be found. Should one want to run this R project in R-studio, it is advised to use the same R version and package versions as have been used here.

1. Load R packages

#Warnings and messages have been inactivated for this markdown
library(phyloseq)
library(microbiome)
library(microbiomeutilities)
library(ggplot2)
library(ggpubr)
library(plyr)
library(dplyr)
library(ape)
library(reshape2)
library(scales)
library(knitr)
library(ggrepel)
library(nlme)
library(lme4)
library(sciplot)
library(apeglm)
library(pheatmap)
library(decontam)
library(Hmisc)
library(picante)
library(emmeans)
library(multcomp)
library(multcompView)
library(data.table) 
library(purrr) 
library(viridis) 
library(RColorBrewer)
library(ALDEx2)
library(compositions)
library(zCompositions)
library(CoDaSeq)
library(propr)
library(vegan)
library(DT)
library(DESeq2)
library(psych)
library(car)
library(ggvegan)
library(metamicrobiomeR)
library(corrplot)
library(tidyverse)
library(rstatix)
library(datarium)

2. Directory structure

# Create Folders as following

# Figures 
dir.create("figures")  

# Phyloseq objects  
dir.create("phyobjects")  

# Phyloseq objects  
dir.create("output_data")  

3. Data input and subsetting

3.1. Loading data

Loading Illumina sequencing data of library 1-14. Data was run through NGTax 2.0 and SILVA database version 132 was used as reference database.

Create phyloseq object

# create phyloseq object of all libraries
ps <- read_phyloseq(otu.file = "./input_data/Galaxy58-[NG-Tax__porcine_study_all_samples_eightrun_length70bp_11march2020].biom1", 
                    taxonomy.file = NULL, 
                    metadata.file = "./input_data/Metadata_porcine_study_june_2020_y_glucan.csv", 
                    type = "biom")
## Time to complete depends on OTU file size
# create tree object
treefile <- read.tree("./input_data/all_otus_eightrun_length70bp_11march2020.tre")
# merge tree and phyloseq object
ps1.all <- merge_phyloseq(ps,treefile)
print(ps1.all)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4157 taxa and 482 samples ]
## sample_data() Sample Data:       [ 482 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 4157 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 4157 tips and 4156 internal nodes ]
# export taxonomy table (for Windows)
taxonomy_table_ps1 <- as.data.frame(ps1.all@tax_table)
write.csv(taxonomy_table_ps1, file = "./output_data/taxonomy_table_ps1.csv", fileEncoding = "UTF-16LE") 
ntaxa(ps1.all)
## [1] 4157

Remove pattern in taxa names

# remove pattern in taxa names
tax_table(ps1.all)[,colnames(tax_table(ps1.all))] <- gsub(tax_table(ps1.all)[,colnames(tax_table(ps1.all))],pattern="[a-z]__",replacement="")

Export phyloseq object

# this is the raw, unfiltered phyloseq file
saveRDS(ps1.all, "./phyobjects/ps1_all.rds")
ps1.all <- readRDS("./phyobjects/ps1_all.rds")

# create new object to work with
ps1 <- ps1.all

3.2. Cleaning data

Removing mitochondrial and chloroplast reads

# replace empty fields by "Unmatched_level" 
# this is needed because otherwise the microbiome::aggregate_taxa() function will not run
tax_table(ps1)[tax_table(ps1)[,"Phylum"]== "","Phylum"] <- "Unmatched_phylum"
tax_table(ps1)[tax_table(ps1)[,"Class"]== "","Class"] <- "Unmatched_class"
tax_table(ps1)[tax_table(ps1)[,"Order"]== "","Order"] <- "Unmatched_order"
tax_table(ps1)[tax_table(ps1)[,"Family"]== "","Family"] <- "Unmatched_family"
tax_table(ps1)[tax_table(ps1)[,"Genus"]== "","Genus"] <- "Unmatched_genus"

# remove mitochondria and chloroplasts
ps1 <- subset_taxa(ps1, Family != "Mitochondria")
ps1 <- subset_taxa(ps1, Class != "Chloroplast") #No Chloroplasts found in class Chloroplasts

ps1.mitoch <- subset_taxa(ps1.all, Family == "Mitochondria")
ps1.mitoch <- prune_samples(sample_sums(ps1.mitoch)>0,ps1.mitoch) 
sort(sample_sums(ps1.mitoch))
## m2195 m3035 m1338 m1315 m3041 m2199 m3073 m1832 m3012 m3032 m1318 m3022 m3016 
##   163   188   198   202   242   248   278   464   829  1440  2110  2603  3542 
## m1339 m3037 m3042 m3031 m3040 m1329 m3015 m2194 m3011 
##  4010  4951  5456  6973  9682 15198 21652 33679 34331
# There are however Oxyphotobacteria (Dutch translation: blauwalgen) present in the dataset, which are not removed from the dataset:
ps1.chloro <- subset_taxa(ps1.all, Order == "Chloroplast")
ps1.chloro <- prune_samples(sample_sums(ps1.chloro)>0,ps1.chloro) 
sort(sample_sums(ps1.chloro))
##  water.blank.9 water.blank.13          m2209          m2199          m2205 
##              2             36            103            142            298 
##          m1398          m2183          m1832          m3035          m1317 
##            309            333            446            483            517 
##          m1356          m1333          m3041          m3026          m3073 
##            546            607            700            725            726 
##          m3020          m2235          m2232          m1318          m3016 
##            956           1070           1208           2832           3218 
##          m3042          m3012          m3032          m1339          m3022 
##           3448           4065           5062           5465           7845 
##          m3011          m3031          m3015          m3040          m3037 
##           8747          11367          11572          11760          18781 
##          m1329          m2194 
##          46379          49513
print(ps1)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4123 taxa and 482 samples ]
## sample_data() Sample Data:       [ 482 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 4123 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 4123 tips and 4122 internal nodes ]
ntaxa(ps1.all)-ntaxa(ps1) # 34 ASVs removed which were belonging to mitochondria
## [1] 34

3.3. Decontaminating dataset

Remove contaminant reads, identified by visual inspection of each ASV individually using correlation plots [DNA_reading] vs rel. ASV abundance. Resulted in 8 ASVs identified as contaminants.

# remove samples that have no reads. Does not do anything as in this dataset there are reads in every sample:
psx <- prune_samples(sample_sums(ps1)>0,ps1) 

# add higher taxa names
psx.tax <- as.data.frame(psx@tax_table)
psx.tax$OTU <- rownames(psx.tax)

# import contaminant OTU table
visContam <- read.delim("./input_data/Contaminant_OTU_by_visual_identification.txt",header=T)

psx.tax2 <- psx.tax 
rownames(psx.tax2) <- NULL # reset rownames for subset based on index
psx.tax2$contam <- rownames(psx.tax2) %in% visContam$OTU
table(psx.tax2$contam) # works: 8 OTUs T, rest F
## 
## FALSE  TRUE 
##  4115     8
rownames(psx.tax2) <- psx.tax2$OTU # restore OTU as rownames
psx.tax2$reads <- taxa_sums(psx)

# calculate % contaminant reads
contam.otu2 <- subset(psx.tax2, contam == "TRUE")
(sum(contam.otu2$reads) / sum(abundances(psx)))
## [1] 9.182982e-05
# 0.0092% of all reads

# subset phyloseq to contaminant OTUs
psx.sub <- subset_taxa(psx, psx.tax2$contam)

ps1.decontam <- prune_taxa(!psx.tax2$contam, ps1) 
ps1.decontam
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4115 taxa and 482 samples ]
## sample_data() Sample Data:       [ 482 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 4115 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 4115 tips and 4114 internal nodes ]
ps1.decontam <- prune_samples(sample_sums(ps1.decontam)>0, ps1.decontam)
ps1.decontam # 4115 taxa, 482 samples
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4115 taxa and 481 samples ]
## sample_data() Sample Data:       [ 481 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 4115 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 4115 tips and 4114 internal nodes ]
# samples with 0 reads after removing contaminants: 1 (water.blank.10)
# (code in following 2 lines only runs if not running prune_samples command above, used to identify removed samples)
#sample0 <- subset_samples(ps1.decontam, sample_sums(ps1.decontam)==0)
#sample_data(sample0)

ntaxa(ps1)-ntaxa(ps1.decontam)
## [1] 8
# 8 OTUs removed (resulting from the visual identification of contaminants)

sum(abundances(ps1)) - sum(abundances(ps1.decontam))
## [1] 8244
# 8244 reads removed.

3.4. Export phyloseq objects

# dataset without mitochondrial/chloroplast reads, but with contaminants
saveRDS(ps1, "./phyobjects/ps1.rds")
ps1 <- readRDS("./phyobjects/ps1.rds")

# decontaminated dataset
saveRDS(ps1.decontam, "./phyobjects/ps1.decontam.rds")
ps1.decontam <- readRDS("./phyobjects/ps1.decontam.rds")

3.5. Subsetting

# all following objects are without contaminant ASVs:
ps1.unique <- subset_samples(ps1.decontam, Unique == "yes")
ps1.noROM <- subset_samples(ps1.unique, Treatment != "ROM")
ps1.glucan <- subset_samples(ps1.noROM, Treatment != "EcN")
ps1.glucan <- prune_taxa(taxa_sums(otu_table(ps1.glucan))>0, ps1.glucan)
ps1.faeces <- subset_samples(ps1.glucan, Origin == "faeces")
ps1.faeces <- prune_taxa(taxa_sums(otu_table(ps1.faeces))>0, ps1.faeces)
ps1.faeces.pre <- subset_samples(ps1.faeces, Pre_or_post_weaning == "Pre_weaning")
ps1.faeces.pre <- prune_taxa(taxa_sums(otu_table(ps1.faeces.pre))>0, ps1.faeces.pre)
ps1.faeces.post <- subset_samples(ps1.faeces, Pre_or_post_weaning == "Post_weaning")
ps1.faeces.post <- prune_taxa(taxa_sums(otu_table(ps1.faeces.post))>0, ps1.faeces.post)
ps1.digesta <- subset_samples(ps1.glucan, Origin %in% c("jejunum_digesta", "ileum_digesta", "caecum_digesta"))
ps1.digesta <- prune_taxa(taxa_sums(otu_table(ps1.digesta))>0, ps1.digesta)
ps1.jejunum.digesta <- subset_samples(ps1.glucan, Origin %in% c("jejunum_digesta"))
ps1.jejunum.digesta <- prune_taxa(taxa_sums(otu_table(ps1.jejunum.digesta))>0, ps1.jejunum.digesta)
ps1.ileum.digesta <- subset_samples(ps1.glucan, Origin %in% c("ileum_digesta"))
ps1.ileum.digesta <- prune_taxa(taxa_sums(otu_table(ps1.ileum.digesta))>0, ps1.ileum.digesta)
ps1.caecum.digesta <- subset_samples(ps1.glucan, Origin %in% c("caecum_digesta"))
ps1.caecum.digesta <- prune_taxa(taxa_sums(otu_table(ps1.caecum.digesta))>0, ps1.caecum.digesta)
ps1.swabs <- subset_samples(ps1.faeces, Swab %in% c("swab"))
ps1.swabs <- prune_taxa(taxa_sums(otu_table(ps1.swabs))>0, ps1.swabs)
ps1.no.swabs <- subset_samples(ps1.faeces, Swab %in% c("no_swab"))
ps1.no.swabs <- prune_taxa(taxa_sums(otu_table(ps1.no.swabs))>0, ps1.no.swabs)
print(ps1.faeces) # 287 samples
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2292 taxa and 287 samples ]
## sample_data() Sample Data:       [ 287 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 2292 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 2292 tips and 2291 internal nodes ]
# filter out archaea and place in phyloseq object
ps1.archaea <- subset_taxa(ps1.glucan, Phylum == "Euryarchaeota")
ps1.archaea
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 22 taxa and 421 samples ]
## sample_data() Sample Data:       [ 421 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 22 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 22 tips and 21 internal nodes ]

3.6. Subsetting for quality checks

#   negative controls - includes contaminant reads (ps1)
ps1.contr <- subset_samples(ps1, Origin %in% c("blank"))
ps1.contr <- prune_taxa(taxa_sums(otu_table(ps1.contr))>0, ps1.contr)
print(ps1.contr)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 202 taxa and 14 samples ]
## sample_data() Sample Data:       [ 14 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 202 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 202 tips and 201 internal nodes ]
# positive controls (mocks) - with contaminant reads (ps1)
ps1.mock <- subset_samples(ps1, Origin %in% c("mock.3","mock.4"))
ps1.mock <- prune_taxa(taxa_sums(otu_table(ps1.mock))>0, ps1.mock)
print(ps1.mock)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 115 taxa and 28 samples ]
## sample_data() Sample Data:       [ 28 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 115 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 115 tips and 114 internal nodes ]
# technical duplicates (PCR) - includes contaminant reads (ps1)
ps1.tech <- subset_samples(ps1, Description %in% c("m1858", "m1858R1","m1858R2","m1858R3", "m2158", "m2158R1", "m2158R2", "m2158R3", "m169", "m169R1", "m169R2", "m169R3", "m169R4", "m169R5", "m169R6", "m924", "m924R1", "m924R2", "m924R3", "m924R4", "m924R5", "m924R6"))
ps1.tech <- prune_taxa(taxa_sums(otu_table(ps1.tech))>0, ps1.tech)
print(ps1.tech)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 345 taxa and 22 samples ]
## sample_data() Sample Data:       [ 22 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 345 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 345 tips and 344 internal nodes ]
#   subset mitochondrial sequences
ps1.mit <- subset_taxa(ps1.all, Family == "Mitochondria")
print(ps1.mit)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 34 taxa and 482 samples ]
## sample_data() Sample Data:       [ 482 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 34 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 34 tips and 33 internal nodes ]
# subset chloroplast sequences
#ps1.chloro <- subset_taxa(ps1.all, Class == "Chloroplast") #does not run, since there do not seem to be chloroplast reads present

3.7. Save subsets

Saving subsets in compressed formats (RDS).

# with contaminants
saveRDS(ps1.decontam, "./phyobjects/ps1.decontam.rds")
saveRDS(ps1.unique, "./phyobjects/ps1.unique.rds")
saveRDS(ps1.all, "./phyobjects/ps1.all.rds")
saveRDS(ps1.tech, "./phyobjects/ps1.tech.rds")
saveRDS(ps1.contr, "./phyobjects/ps1.contr.rds")
saveRDS(ps1.mock, "./phyobjects/ps1.mock.rds")
saveRDS(ps1.mit, "./phyobjects/ps1.mit.rds")
saveRDS(ps1.faeces, "./phyobjects/ps1.faeces.rds")
saveRDS(ps1.faeces.pre, "./phyobjects/ps1.faeces.pre.rds")
saveRDS(ps1.faeces.post, "./phyobjects/ps1.faeces.post.rds")
saveRDS(ps1.glucan, "./phyobjects/ps1.glucan.rds")
saveRDS(ps1.digesta, "./phyobjects/ps1.digesta.rds")
saveRDS(ps1.jejunum.digesta, "./phyobjects/ps1.jejunum.digesta.rds")
saveRDS(ps1.ileum.digesta, "./phyobjects/ps1.ileum.digesta.rds") 
saveRDS(ps1.caecum.digesta, "./phyobjects/ps1.caecum.digesta.rds")
saveRDS(ps1.swabs, "./phyobjects/ps1.swabs.rds")
saveRDS(ps1.no.swabs, "./phyobjects/ps1.no.swabs.rds")
saveRDS(ps1.archaea, "./phyobjects/ps1.archaea.rds")

# load full datasets from RDS
ps1.all <- readRDS("./phyobjects/ps1.all.rds")

3.8. Exploring specs

Exploring the stats of the datasets: # reads, ASVs, nr of genera, etc.

sum(sample_sums(ps1.all)) # raw phyloseq
## [1] 89923196
sum(sample_sums(ps1)) # excl mitrochondria/chloroplast, incl contaminants
## [1] 89774757
sum(sample_sums(ps1.decontam)) # excl mitrochondria/chloroplast, excl contaminants
## [1] 89766513
print(ps1)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4123 taxa and 482 samples ]
## sample_data() Sample Data:       [ 482 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 4123 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 4123 tips and 4122 internal nodes ]
ntaxa(aggregate_taxa(ps1,"Genus"))
## [1] 429
colnames(tax_table(ps1))
## [1] "Domain" "Phylum" "Class"  "Order"  "Family" "Genus"

4. Alpha diversity analyses

4.1. Loading data for plots

ps1.faeces <- readRDS("./phyobjects/ps1.faeces.rds") #all faecal samples

# this line is used to make sure the order of days is chronological:
ps1.faeces@sam_data$Day_of_study_factor <- factor(ps1.faeces@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70")) 
# this line is used to make sure pre-weaning is followed by post-weaning:
ps1.faeces@sam_data$Pre_or_post_weaning <- factor(ps1.faeces@sam_data$Pre_or_post_weaning, levels = c("Pre-weaning", "Post-weaning")) 

ps1.digesta <- readRDS("./phyobjects/ps1.digesta.rds") #all digesta samples
# this line is used to reorder segments according order luminal content passages through the GI tract of the animals:
ps1.digesta@sam_data$Origin <- factor(ps1.digesta@sam_data$Origin, levels = c("jejunum_digesta", "ileum_digesta", "caecum_digesta")) 
print(ps1.digesta) #contains 1608 taxa in 134 samples
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1608 taxa and 134 samples ]
## sample_data() Sample Data:       [ 134 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 1608 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1608 tips and 1607 internal nodes ]
ps1.glucan <- readRDS("./phyobjects/ps1.glucan.rds") #all faecal and digestal samples of the current study

# this line is used to combine faeces and digesta in an orderly fashion for use in the alpha-diversity plots:
ps1.glucan@sam_data$Origin_Day <- factor(ps1.glucan@sam_data$Origin_Day, levels = c("faeces_Day_4", "faeces_Day_8", "faeces_Day_14", "faeces_Day_26", "faeces_Day_35", "faeces_Day_43", "faeces_Day_59", "faeces_Day_69", "jejunum_digesta_Day_27", "jejunum_digesta_Day_44", "jejunum_digesta_Day_70", "ileum_digesta_Day_27", "ileum_digesta_Day_44", "ileum_digesta_Day_70", "caecum_digesta_Day_27", "caecum_digesta_Day_44", "caecum_digesta_Day_70")) 

4.2. Plotting

p <- plot_richness(ps1.faeces, "Day_of_study_factor", color = "Treatment", measures = "Shannon")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Day_of_study")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha=1, size=1.5) #+ theme_bw()
p$layers <- p$layers[-1]
print (p) #this plot is created for illustrative purposes, and is not saved or used in the manuscript

p <- plot_richness(ps1.digesta, "Day_of_study_factor", color = "Treatment", measures = "Shannon")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Day_of_study")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha=1, size=1.5)
p <- p + facet_wrap("Origin")
p$layers <- p$layers[-1]
print (p) #this plot is created for illustrative purposes, and is not saved or used in the manuscript

# following figures combine the faeces and digesta alpha diversities
p <- plot_richness(ps1.glucan, "Origin_Day", color = "Treatment", measures = "Shannon")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Origin_Day")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha = .9, size=2)
p$layers <- p$layers[-1]
#The final figure can be found under panel B in Figure 2 of the manuscript
print(p)

ggsave("./figures/Alpha_diversity_Shannon.pdf", height = 7, width = 11)

p <- plot_richness(ps1.glucan, "Origin_Day", color = "Treatment", measures = "InvSimpson")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Origin_Day")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha = .9, size=2)
p$layers <- p$layers[-1]
#The final figure can be found under panel A in Figure 2 of the manuscript
print(p)

ggsave("./figures/Alpha_diversity_InvSimpson.pdf", height = 7, width = 11)

4.3. Loading data for LME Models

Loading data for LME (Linear Mixed-Effects models)

ps1 <- readRDS("./phyobjects/ps1.glucan.rds")

ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))

#ps1 <- tax_glom(ps1, "Genus") One can aggregate ASVs to genus level to facilitate genus-level analysis. It was however chosen to work with ASV-level data for alpha diversity analyses so this line is not used.

ps1.fcs <- subset_samples(ps1, Origin %in% c("faeces"))
ps1.fcs.pre <- subset_samples(ps1.fcs, Pre_or_post_weaning %in% c("Pre_weaning"))
ps1.jej <- subset_samples(ps1, Origin %in% c("jejunum_digesta"))
ps1.ile <- subset_samples(ps1, Origin %in% c("ileum_digesta"))
ps1.cae <- subset_samples(ps1, Origin %in% c("caecum_digesta"))

ps1.fcs <- prune_taxa(taxa_sums(otu_table(ps1.fcs))>0, ps1.fcs)
ps1.fcs.pre <- prune_taxa(taxa_sums(otu_table(ps1.fcs.pre))>0, ps1.fcs.pre)
ps1.jej <- prune_taxa(taxa_sums(otu_table(ps1.jej))>0, ps1.jej)
ps1.ile <- prune_taxa(taxa_sums(otu_table(ps1.ile))>0, ps1.ile)
ps1.cae <- prune_taxa(taxa_sums(otu_table(ps1.cae))>0, ps1.cae)

ps1.fcs.r <- microbiome::transform(ps1.fcs, "compositional")
ps1.fcs.pre.r <- microbiome::transform(ps1.fcs.pre, "compositional")
ps1.jej.r <- microbiome::transform(ps1.jej, "compositional")
ps1.ile.r <- microbiome::transform(ps1.ile, "compositional")
ps1.cae.r <- microbiome::transform(ps1.cae, "compositional")

# input for alpha diversity
ps1.otu <- as.data.frame(ps1.fcs.r@otu_table)
ps1.tree <- ps1.fcs.r@phy_tree

ps1.pre.otu <- as.data.frame(ps1.fcs.pre.r@otu_table)
ps1.pre.tree <- ps1.fcs.pre.r@phy_tree

ps1.jej.otu <- as.data.frame(ps1.jej.r@otu_table)
ps1.jej.tree <- ps1.jej.r@phy_tree

ps1.ile.otu <- as.data.frame(ps1.ile.r@otu_table)
ps1.ile.tree <- ps1.ile.r@phy_tree

ps1.cae.otu <- as.data.frame(ps1.cae.r@otu_table)
ps1.cae.tree <- ps1.cae.r@phy_tree

4.4. Calculate alpha diversity

# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df <- data.frame("Description" = colnames(ps1.otu))

ad.df$Shannon <- estimate_richness(ps1.fcs)$Shannon
## Warning in estimate_richness(ps1.fcs): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df$InvSimpson <- estimate_richness(ps1.fcs)$InvSimpson
## Warning in estimate_richness(ps1.fcs): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df <- merge(meta(ps1.fcs.r), ad.df, by = "Description", all.x=T)

ad.df <- ad.df

4.5. Analysis on all faecal samples

# mixed model: random term and variance structure
lme.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
              data = ad.df, method = "REML",
              random = ~ 1 | Ear_tag)

lme.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
              data = ad.df, method = "REML",
              random = ~ 1 | Ear_tag)

# residual plots to visually inspect normality assumption
plot(resid(lme.pd1, method= "pearson")~ad.df$Day_of_study_factor); abline(0,0)

hist(resid(lme.pd1, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.pd1, ∼ranef (.))

qqnorm(lme.pd1)

plot(resid(lme.pd2, method= "pearson")~ad.df$Day_of_study_factor); abline(0,0)

hist(resid(lme.pd2, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.pd2, ∼ranef (.))

qqnorm(lme.pd2)

# model output
aov.pd.ls1 <- anova(lme.pd1)
aov.pd.ls1
##                               numDF denDF   F-value p-value
## (Intercept)                       1   225 23680.734  <.0001
## Day_of_study_factor               7   225    26.781  <.0001
## Treatment                         1    46     2.601  0.1136
## Day_of_study_factor:Treatment     7   225     1.465  0.1807
aov.pd.ls2 <- anova(lme.pd2)
aov.pd.ls2
##                               numDF denDF   F-value p-value
## (Intercept)                       1   225 1207.4831  <.0001
## Day_of_study_factor               7   225   14.0901  <.0001
## Treatment                         1    46    0.2810  0.5986
## Day_of_study_factor:Treatment     7   225    0.9598  0.4615
# test differences between timepoints - Shannon diversity
res.aov2 <- aov(Shannon ~ Day_of_study_factor, data = ad.df)
summary(res.aov2)
##                      Df Sum Sq Mean Sq F value Pr(>F)    
## Day_of_study_factor   7  30.24   4.320   26.32 <2e-16 ***
## Residuals           279  45.79   0.164                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Shannon ~ Day_of_study_factor, data = ad.df)
## 
## $Day_of_study_factor
##                      diff         lwr       upr     p adj
## Day_8-Day_4    0.44294102  0.19039591 0.6954861 0.0000049
## Day_14-Day_4   0.56482556  0.31228045 0.8173707 0.0000000
## Day_26-Day_4   0.78572442  0.53183953 1.0396093 0.0000000
## Day_35-Day_4   0.99917659  0.71682258 1.2815306 0.0000000
## Day_43-Day_4   0.81884184  0.53648782 1.1011959 0.0000000
## Day_59-Day_4   0.90620138  0.54904866 1.2633541 0.0000000
## Day_69-Day_4   0.95042359  0.59327087 1.3075763 0.0000000
## Day_14-Day_8   0.12188454 -0.13066057 0.3744297 0.8207226
## Day_26-Day_8   0.34278340  0.08889851 0.5966683 0.0012643
## Day_35-Day_8   0.55623558  0.27388156 0.8385896 0.0000002
## Day_43-Day_8   0.37590082  0.09354680 0.6582548 0.0015870
## Day_59-Day_8   0.46326036  0.10610764 0.8204131 0.0023791
## Day_69-Day_8   0.50748257  0.15032985 0.8646353 0.0005240
## Day_26-Day_14  0.22089886 -0.03298602 0.4747837 0.1404516
## Day_35-Day_14  0.43435104  0.15199702 0.7167051 0.0001110
## Day_43-Day_14  0.25401628 -0.02833774 0.5363703 0.1130279
## Day_59-Day_14  0.34137582 -0.01577690 0.6985285 0.0726414
## Day_69-Day_14  0.38559803  0.02844531 0.7427508 0.0241359
## Day_35-Day_26  0.21345218 -0.07010080 0.4970052 0.2978287
## Day_43-Day_26  0.03311742 -0.25043556 0.3166704 0.9999645
## Day_59-Day_26  0.12047696 -0.23762437 0.4785783 0.9699513
## Day_69-Day_26  0.16469917 -0.19340216 0.5228005 0.8546363
## Day_43-Day_35 -0.18033476 -0.48963809 0.1289686 0.6337445
## Day_59-Day_35 -0.09297522 -0.47179289 0.2858425 0.9953216
## Day_69-Day_35 -0.04875301 -0.42757067 0.3300647 0.9999314
## Day_59-Day_43  0.08735954 -0.29145813 0.4661772 0.9968286
## Day_69-Day_43  0.13158175 -0.24723592 0.5103994 0.9641856
## Day_69-Day_59  0.04422221 -0.39319875 0.4816432 0.9999868
# test differences between timepoints - InvSimpson diversity
res.aov2 <- aov(InvSimpson ~ Day_of_study_factor, data = ad.df)
summary(res.aov2)
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## Day_of_study_factor   7  11186  1598.0   14.08 1.24e-15 ***
## Residuals           279  31674   113.5                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = InvSimpson ~ Day_of_study_factor, data = ad.df)
## 
## $Day_of_study_factor
##                     diff         lwr       upr     p adj
## Day_8-Day_4    4.7395735  -1.9027891 11.381936 0.3675634
## Day_14-Day_4   7.3851438   0.7427813 14.027506 0.0176412
## Day_26-Day_4  15.1672396   8.4896388 21.844840 0.0000000
## Day_35-Day_4  19.0202808  11.5938937 26.446668 0.0000000
## Day_43-Day_4  11.9315698   4.5051828 19.357957 0.0000428
## Day_59-Day_4  16.0014942   6.6077750 25.395213 0.0000105
## Day_69-Day_4  13.6824451   4.2887260 23.076164 0.0003310
## Day_14-Day_8   2.6455703  -3.9967922  9.287933 0.9265954
## Day_26-Day_8  10.4276661   3.7500653 17.105267 0.0000806
## Day_35-Day_8  14.2807073   6.8543203 21.707094 0.0000003
## Day_43-Day_8   7.1919964  -0.2343907 14.618383 0.0654429
## Day_59-Day_8  11.2619207   1.8682016 20.655640 0.0071690
## Day_69-Day_8   8.9428717  -0.4508475 18.336591 0.0749470
## Day_26-Day_14  7.7820958   1.1044950 14.459697 0.0102256
## Day_35-Day_14 11.6351370   4.2087500 19.061524 0.0000751
## Day_43-Day_14  4.5464261  -2.8799610 11.972813 0.5729145
## Day_59-Day_14  8.6163504  -0.7773687 18.010070 0.0988058
## Day_69-Day_14  6.2973014  -3.0964178 15.691021 0.4519726
## Day_35-Day_26  3.8530412  -3.6048805 11.310963 0.7632327
## Day_43-Day_26 -3.2356697 -10.6935915  4.222252 0.8887275
## Day_59-Day_26  0.8342546  -8.5844147 10.252924 0.9999947
## Day_69-Day_26 -1.4847944 -10.9034638  7.933875 0.9997319
## Day_43-Day_35 -7.0887110 -15.2239104  1.046488 0.1391600
## Day_59-Day_35 -3.0187866 -12.9823304  6.944757 0.9834121
## Day_69-Day_35 -5.3378357 -15.3013795  4.625708 0.7279263
## Day_59-Day_43  4.0699244  -5.8936194 14.033468 0.9167609
## Day_69-Day_43  1.7508753  -8.2126685 11.714419 0.9994497
## Day_69-Day_59 -2.3190491 -13.8239584  9.185860 0.9986545

4.6. Alpha diversity - pre-weaning

# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df.pre <- data.frame("Description" = colnames(ps1.pre.otu))

ad.df.pre$Shannon <- estimate_richness(ps1.fcs.pre)$Shannon
## Warning in estimate_richness(ps1.fcs.pre): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df.pre$InvSimpson <- estimate_richness(ps1.fcs.pre)$InvSimpson
## Warning in estimate_richness(ps1.fcs.pre): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df.pre <- merge(meta(ps1.fcs.pre.r), ad.df.pre, by = "Description", all.x=T)

ad.df.pre <- ad.df.pre

4.7. LME pre-weaning faecal samples

# mixed model: random term and variance structure
lme.pre.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
              data = ad.df.pre, method = "REML",
              random = ~ 1 | Ear_tag)

lme.pre.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
              data = ad.df.pre, method = "REML",
              random = ~ 1 | Ear_tag)


# residual plots to visually inspect normality assumption
plot(resid(lme.pre.pd1, method= "pearson")~ad.df.pre$Day_of_study_factor); abline(0,0)

hist(resid(lme.pre.pd1, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.pre.pd1, ∼ranef (.))

qqnorm(lme.pre.pd1)

plot(resid(lme.pre.pd2, method= "pearson")~ad.df.pre$Day_of_study_factor); abline(0,0)

hist(resid(lme.pre.pd2, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.pre.pd2, ∼ranef (.))

qqnorm(lme.pre.pd2)

# model output
aov.pd.ls1 <- anova(lme.pre.pd1)
aov.pd.ls1
##                               numDF denDF   F-value p-value
## (Intercept)                       1   137 11366.862  <.0001
## Day_of_study_factor               3   137    34.338  <.0001
## Treatment                         1    46     4.561  0.0381
## Day_of_study_factor:Treatment     3   137     0.895  0.4454
aov.pd.ls2 <- anova(lme.pre.pd2)
aov.pd.ls2
##                               numDF denDF  F-value p-value
## (Intercept)                       1   137 883.6347  <.0001
## Day_of_study_factor               3   137  30.1488  <.0001
## Treatment                         1    46   2.8850  0.0962
## Day_of_study_factor:Treatment     3   137   0.2277  0.8770

4.8. Alpha diversity - jejunum digesta

# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df.jej <- data.frame("Description" = colnames(ps1.jej.otu)) 

ad.df.jej$Shannon <- estimate_richness(ps1.jej)$Shannon
## Warning in estimate_richness(ps1.jej): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df.jej$InvSimpson <- estimate_richness(ps1.jej)$InvSimpson
## Warning in estimate_richness(ps1.jej): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df.jej <- merge(meta(ps1.jej.r), ad.df.jej, by = "Description", all.x=T)

ad.df.jej <- ad.df.jej

4.9. LME jejunum luminal samples

# mixed model: random term and variance structure
lme.jej.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
              data = ad.df.jej, method = "REML",
              random = ~ 1 | Ear_tag)

lme.jej.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
              data = ad.df.jej, method = "REML",
              random = ~ 1 | Ear_tag)

# residual plots to visually inspect normality assumption
plot(resid(lme.jej.pd1, method= "pearson")~ad.df.jej$Day_of_study_factor); abline(0,0)

hist(resid(lme.jej.pd1, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.jej.pd1, ∼ranef (.))

qqnorm(lme.jej.pd1)

plot(resid(lme.jej.pd2, method= "pearson")~ad.df.jej$Day_of_study_factor); abline(0,0)

hist(resid(lme.jej.pd2, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.jej.pd2, ∼ranef (.))

qqnorm(lme.jej.pd2)

# model output
aov.pd.ls1 <- anova(lme.jej.pd1)
aov.pd.ls1
##                               numDF denDF   F-value p-value
## (Intercept)                       1    36 1324.7306  <.0001
## Day_of_study_factor               2    36   27.9231  <.0001
## Treatment                         1    36    2.0389  0.1619
## Day_of_study_factor:Treatment     2    36    0.2225  0.8016
aov.pd.ls2 <- anova(lme.jej.pd2)
aov.pd.ls2
##                               numDF denDF   F-value p-value
## (Intercept)                       1    36 254.33627  <.0001
## Day_of_study_factor               2    36  11.65518  0.0001
## Treatment                         1    36   1.39728  0.2449
## Day_of_study_factor:Treatment     2    36   0.26897  0.7657
# test differences between timepoints - Shannon diversity
res.aov2 <- aov(Shannon ~ Day_of_study_factor, data = ad.df.jej)
summary(res.aov2)
##                     Df Sum Sq Mean Sq F value   Pr(>F)    
## Day_of_study_factor  2 10.179   5.089    28.3 2.55e-08 ***
## Residuals           39  7.014   0.180                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Shannon ~ Day_of_study_factor, data = ad.df.jej)
## 
## $Day_of_study_factor
##                      diff        lwr        upr     p adj
## Day_44-Day_27 -1.09863471 -1.5087814 -0.6884880 0.0000003
## Day_70-Day_27  0.03951321 -0.3318251  0.4108515 0.9636645
## Day_70-Day_44  1.13814792  0.7334602  1.5428356 0.0000001
# test differences between timepoints - InvSimpson diversity
res.aov2 <- aov(InvSimpson ~ Day_of_study_factor, data = ad.df.jej)
summary(res.aov2)
##                     Df Sum Sq Mean Sq F value   Pr(>F)    
## Day_of_study_factor  2  160.6    80.3   11.98 8.78e-05 ***
## Residuals           39  261.4     6.7                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = InvSimpson ~ Day_of_study_factor, data = ad.df.jej)
## 
## $Day_of_study_factor
##                    diff       lwr       upr     p adj
## Day_44-Day_27 -3.621304 -6.124879 -1.117728 0.0030966
## Day_70-Day_27  1.262888 -1.003797  3.529574 0.3728454
## Day_70-Day_44  4.884192  2.413938  7.354446 0.0000651

4.10. Alpha diversity - ileum digesta

# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df.ile <- data.frame("Description" = colnames(ps1.ile.otu))

ad.df.ile$Shannon <- estimate_richness(ps1.ile)$Shannon
## Warning in estimate_richness(ps1.ile): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df.ile$InvSimpson <- estimate_richness(ps1.ile)$InvSimpson
## Warning in estimate_richness(ps1.ile): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df.ile <- merge(meta(ps1.ile.r), ad.df.ile, by = "Description", all.x=T)

ad.df.ile <- ad.df.ile

4.11. LME ileum luminal samples

# mixed model: random term and variance structure
lme.ile.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
              data = ad.df.ile, method = "REML",
              random = ~ 1 | Ear_tag)

lme.ile.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
              data = ad.df.ile, method = "REML",
              random = ~ 1 | Ear_tag)

# residual plots to visually inspect normality assumption
plot(resid(lme.ile.pd1, method= "pearson")~ad.df.ile$Day_of_study_factor); abline(0,0)

hist(resid(lme.ile.pd1, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.ile.pd1, ∼ranef (.))

qqnorm(lme.ile.pd1)

plot(resid(lme.ile.pd2, method= "pearson")~ad.df.ile$Day_of_study_factor); abline(0,0)

hist(resid(lme.ile.pd2, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.ile.pd2, ∼ranef (.))

qqnorm(lme.ile.pd2)

# model output
aov.pd.ls1 <- anova(lme.ile.pd1)
aov.pd.ls1
##                               numDF denDF   F-value p-value
## (Intercept)                       1    38 1237.8926  <.0001
## Day_of_study_factor               2    38   30.7933  <.0001
## Treatment                         1    38    0.9861  0.3270
## Day_of_study_factor:Treatment     2    38    0.7677  0.4711
aov.pd.ls2 <- anova(lme.ile.pd2)
aov.pd.ls2
##                               numDF denDF  F-value p-value
## (Intercept)                       1    38 451.2633  <.0001
## Day_of_study_factor               2    38  52.7914  <.0001
## Treatment                         1    38   3.8176  0.0581
## Day_of_study_factor:Treatment     2    38   2.3983  0.1045
# test differences between timepoints - Shannon diversity
res.aov2 <- aov(Shannon ~ Day_of_study_factor, data = ad.df.ile)
summary(res.aov2)
##                     Df Sum Sq Mean Sq F value   Pr(>F)    
## Day_of_study_factor  2  9.092   4.546   31.16 5.91e-09 ***
## Residuals           41  5.982   0.146                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Shannon ~ Day_of_study_factor, data = ad.df.ile)
## 
## $Day_of_study_factor
##                     diff        lwr        upr     p adj
## Day_44-Day_27 -1.0211376 -1.3549503 -0.6873249 0.0000000
## Day_70-Day_27 -0.8605586 -1.2125154 -0.5086018 0.0000015
## Day_70-Day_44  0.1605790 -0.1862337  0.5073917 0.5038774
# test differences between timepoints - InvSimpson diversity
res.aov2 <- aov(InvSimpson ~ Day_of_study_factor, data = ad.df.ile)
summary(res.aov2)
##                     Df Sum Sq Mean Sq F value   Pr(>F)    
## Day_of_study_factor  2  272.1  136.07   46.43 2.92e-11 ***
## Residuals           41  120.2    2.93                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = InvSimpson ~ Day_of_study_factor, data = ad.df.ile)
## 
## $Day_of_study_factor
##                    diff       lwr       upr   p adj
## Day_44-Day_27 -5.376613 -6.872672 -3.880554 0.00000
## Day_70-Day_27 -5.072451 -6.649827 -3.495075 0.00000
## Day_70-Day_44  0.304162 -1.250160  1.858484 0.88303

4.12. Alpha diversity - caecum digesta

# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df.cae <- data.frame("Description" = colnames(ps1.cae.otu))

ad.df.cae$Shannon <- estimate_richness(ps1.cae)$Shannon
## Warning in estimate_richness(ps1.cae): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df.cae$InvSimpson <- estimate_richness(ps1.cae)$InvSimpson
## Warning in estimate_richness(ps1.cae): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df.cae <- merge(meta(ps1.cae.r), ad.df.cae, by = "Description", all.x=T)

ad.df.cae <- ad.df.cae

4.13. LME caecum luminal samples

# mixed model: random term and variance structure
lme.cae.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
              data = ad.df.cae, method = "REML",
              random = ~ 1 | Ear_tag)

lme.cae.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
              data = ad.df.cae, method = "REML",
              random = ~ 1 | Ear_tag)


# residual plots to visually inspect normality assumption
plot(resid(lme.cae.pd1, method= "pearson")~ad.df.cae$Day_of_study_factor); abline(0,0)

hist(resid(lme.cae.pd1, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.cae.pd1, ∼ranef (.))

qqnorm(lme.cae.pd1)

plot(resid(lme.cae.pd2, method= "pearson")~ad.df.cae$Day_of_study_factor); abline(0,0)

hist(resid(lme.cae.pd2, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.cae.pd2, ∼ranef (.))

qqnorm(lme.cae.pd2)

# model output
aov.pd.ls1 <- anova(lme.cae.pd1)
aov.pd.ls1
##                               numDF denDF  F-value p-value
## (Intercept)                       1    42 6765.088  <.0001
## Day_of_study_factor               2    42   19.930  <.0001
## Treatment                         1    42    0.091  0.7648
## Day_of_study_factor:Treatment     2    42    1.534  0.2275
aov.pd.ls2 <- anova(lme.cae.pd2)
aov.pd.ls2
##                               numDF denDF  F-value p-value
## (Intercept)                       1    42 412.5819  <.0001
## Day_of_study_factor               2    42  21.9302  <.0001
## Treatment                         1    42   0.1404  0.7098
## Day_of_study_factor:Treatment     2    42   0.8890  0.4187
# test differences between timepoints - Shannon diversity
res.aov2 <- aov(Shannon ~ Day_of_study_factor, data = ad.df.cae)
summary(res.aov2)
##                     Df Sum Sq Mean Sq F value   Pr(>F)    
## Day_of_study_factor  2  4.122  2.0611   19.86 6.57e-07 ***
## Residuals           45  4.670  0.1038                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Shannon ~ Day_of_study_factor, data = ad.df.cae)
## 
## $Day_of_study_factor
##                     diff         lwr         upr     p adj
## Day_44-Day_27 -0.7176074 -0.99364948 -0.44156527 0.0000003
## Day_70-Day_27 -0.3741400 -0.65018207 -0.09809786 0.0055161
## Day_70-Day_44  0.3434674  0.06742531  0.61950952 0.0114964
# test differences between timepoints - InvSimpson diversity
res.aov2 <- aov(InvSimpson ~ Day_of_study_factor, data = ad.df.cae)
summary(res.aov2)
##                     Df Sum Sq Mean Sq F value   Pr(>F)    
## Day_of_study_factor  2   3175  1587.4   22.47 1.71e-07 ***
## Residuals           45   3179    70.6                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = InvSimpson ~ Day_of_study_factor, data = ad.df.cae)
## 
## $Day_of_study_factor
##                     diff        lwr        upr     p adj
## Day_44-Day_27 -19.865000 -27.067086 -12.662915 0.0000001
## Day_70-Day_27 -11.226777 -18.428862  -4.024691 0.0013187
## Day_70-Day_44   8.638223   1.436138  15.840309 0.0152966

5. Community composition analyses

5.1. Load data

ps1.glucan <- readRDS("./phyobjects/ps1.glucan.rds")
print(ps1.glucan)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2969 taxa and 421 samples ]
## sample_data() Sample Data:       [ 421 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 2969 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 2969 tips and 2968 internal nodes ]
ps1.glucan@sam_data$Day_of_study <- factor(ps1.glucan@sam_data$Day_of_study, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1.glucan@sam_data$Pre_or_post_weaning <- factor(ps1.glucan@sam_data$Pre_or_post_weaning, levels = c("Pre-weaning", "Post-weaning"))
ps1.glucan@sam_data$Origin_Day <- factor(ps1.glucan@sam_data$Origin_Day, levels = c("faeces_Day_4", "faeces_Day_8", "faeces_Day_14", "faeces_Day_26", "faeces_Day_35", "faeces_Day_43", "faeces_Day_59", "faeces_Day_69", "jejunum_digesta_Day_27", "jejunum_digesta_Day_44", "jejunum_digesta_Day_70", "ileum_digesta_Day_27", "ileum_digesta_Day_44", "ileum_digesta_Day_70", "caecum_digesta_Day_27", "caecum_digesta_Day_44", "caecum_digesta_Day_70"))

5.2. Prepare data

ps1.subset <- subset_samples(ps1.glucan, Origin %in% c("faeces", "jejunum_digesta", "ileum_digesta", "caecum_digesta"))
ps1.subset <- prune_taxa(taxa_sums(otu_table(ps1.subset))>0, ps1.subset)

taxic <- as.data.frame(ps1.subset@tax_table)
taxic$OTU <- rownames(taxic)
tax_table(ps1.subset) <- tax_table(as.matrix(taxic))
ps1.subset@phy_tree <- NULL

ps1.fam <- microbiome::aggregate_top_taxa(ps1.subset, "Family", top = 25)
## Warning: 'microbiome::aggregate_top_taxa' is deprecated.
## Use 'aggregate_rare' instead.
## See help("Deprecated") and help("The microbiome::aggregate_top_taxa function is deprecated.-deprecated").
ps1.fam.r <- microbiome::transform(ps1.fam, "compositional")

5.3. Plot presets

getPalette = colorRampPalette(brewer.pal(12, "Paired"))

guide_italics <- guides(fill = guide_legend(label.theme = element_text(size = 15, 
    face = "italic", colour = "Black", angle = 0)))

5.4. Barplots

Barplot family level

pFam1 <- plot_composition(ps1.fam.r, x.label = "Treatment", 
                          otu.sort = "abundance",
                          group_by = "Origin_Day", average_by = "Origin_Day") +
  scale_fill_manual(values = getPalette(26)) + theme_bw() +
  theme(axis.text.x = element_text(angle = 90),
        legend.title = element_text(size=18),
        legend.position = "right") + 
  ggtitle("Relative abundance") + guide_italics
pFam1 

pdf(file = "./figures/Barplot_Family.pdf", height = 10, width = 15)
pFam1
dev.off() #The final figure can be found under panel B in Figure 3 of the manuscript
## png 
##   2

Barplot on Phylum level - prepare data

ps1.subset <- subset_samples(ps1.glucan, Origin %in% c("faeces", "jejunum_digesta", "ileum_digesta", "caecum_digesta"))
ps1.subset <- prune_taxa(taxa_sums(otu_table(ps1.subset))>0, ps1.subset)

taxic <- as.data.frame(ps1.subset@tax_table)
taxic$OTU <- rownames(taxic)
tax_table(ps1.subset) <- tax_table(as.matrix(taxic))
ps1.subset@phy_tree <- NULL

ps1.phyl <- microbiome::aggregate_top_taxa(ps1.subset, "Phylum", top = 10)
## Warning: 'microbiome::aggregate_top_taxa' is deprecated.
## Use 'aggregate_rare' instead.
## See help("Deprecated") and help("The microbiome::aggregate_top_taxa function is deprecated.-deprecated").
ps1.phyl.r <- microbiome::transform(ps1.phyl, "compositional")

Barplot Phylum level

pphyl1 <- plot_composition(ps1.phyl.r, sample.sort = "neatmap", x.label = "Treatment", otu.sort = "abundance", group_by = "Origin_Day", average_by = "Origin_Day") +
  scale_fill_manual(values = getPalette(11)) + theme_bw() +
  theme(axis.text.x = element_text(angle = 90),
        legend.title = element_text(size=18),
        legend.position = "right") + 
  ggtitle("Relative abundance") + guide_italics
pphyl1 

pdf(file = "./figures/Barplot_Phylum.pdf", height = 10, width = 12)
pphyl1 
dev.off() #The final figure can be found under panel A in Figure 3 of the manuscript
## png 
##   2

Barplot with Archaea - prepare data

ps1.glucan.r <- microbiome::transform(ps1.glucan, "compositional")

ps1.archaea <- subset_taxa(ps1.glucan.r, Phylum == "Euryarchaeota")
ps1.archaea
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 22 taxa and 421 samples ]
## sample_data() Sample Data:       [ 421 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 22 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 22 tips and 21 internal nodes ]
ps1.subset <- subset_samples(ps1.archaea, Origin %in% c("faeces", "jejunum_digesta", "ileum_digesta", "caecum_digesta"))
ps1.subset <- prune_taxa(taxa_sums(otu_table(ps1.subset))>0, ps1.subset)

taxic <- as.data.frame(ps1.subset@tax_table)
taxic$OTU <- rownames(taxic)
tax_table(ps1.subset) <- tax_table(as.matrix(taxic))
ps1.subset@phy_tree <- NULL

ps1.phyl <- microbiome::aggregate_top_taxa(ps1.subset, "Genus", top = 10)
## Warning: 'microbiome::aggregate_top_taxa' is deprecated.
## Use 'aggregate_rare' instead.
## See help("Deprecated") and help("The microbiome::aggregate_top_taxa function is deprecated.-deprecated").

Barplot with Archaea

pphyl1 <- plot_composition(ps1.phyl, x.label = "Treatment", otu.sort = "abundance", group_by = "Origin_Day", average_by = "Origin_Day") +
  scale_fill_manual(values = getPalette(5)) + theme_bw() +
  theme(axis.text.x = element_text(angle = 90),
        legend.title = element_text(size=18),
        legend.position = "right") + 
  ggtitle("Relative abundance") + guide_italics
pphyl1 

pdf(file = "./figures/Barplot_Genus_archaea.pdf", height = 10, width = 13)
pphyl1
dev.off() #The final figure can be found as Supplementary Figure S5 of the manuscript
## png 
##   2
data <- pphyl1$data #data object generated to investigate abundances of Archaea

6. Beta-diversity analysis

6.1. Input data PCoA plot

Input data Principal Coordinates Analysis plot

ps1 <- readRDS("./phyobjects/ps1.glucan.rds")

ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))

ps1 <- subset_samples(ps1, Origin %in% c("faeces")) 
ps1 <- prune_taxa(taxa_sums(ps1) > 0, ps1)
ps1 <- microbiome::transform(ps1, "compositional")

6.2. Principal Coordinates Analysis plot

Ordination plot based on Weighted Unifrac

set.seed(49275)
ordu.wt.uni = ordinate(ps1, "PCoA", "unifrac", weighted=TRUE)

wt.unifrac <- plot_ordination(ps1, ordu.wt.uni, color="Day_of_study_factor", shape="Treatment")
wt.unifrac <- wt.unifrac + scale_color_viridis(discrete = TRUE, option = "C")+ scale_fill_viridis(discrete = TRUE) +
ggtitle("Ordination plot - PCoA, Weighted UniFrac faecal samples") + geom_point(size = 3) + scale_shape_manual(values=c(15,17))
wt.unifrac <- wt.unifrac + 
  stat_ellipse(type = "norm", linetype = 5) +
  theme_bw()
print(wt.unifrac)

pdf(file = "./figures/PCoA_wt.unifrac.pdf", height = 10, width = 15)
wt.unifrac
dev.off() #The final figure can be found as Figure 4 of the manuscript
## png 
##   2

6.3. Load beta-diversity data

ps1 <- readRDS("./phyobjects/ps1.glucan.rds")

ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))

# subsetting
ps1.fcs <- subset_samples(ps1, Origin %in% c("faeces"))
ps1.fcs.pre <- subset_samples(ps1.fcs, Pre_or_post_weaning %in% c("Pre_weaning"))

ps1.fcs.r <- microbiome::transform(ps1.fcs, "compositional")
ps1.fcs.pre.r <- microbiome::transform(ps1.fcs.pre, "compositional")

# Conversions of data for Adonis tests
fcs.otu <- abundances(ps1.fcs.r)
fcs.meta <- meta(ps1.fcs.r)

fcs.otu.pre <- abundances(ps1.fcs.pre.r)
fcs.meta.pre <- meta(ps1.fcs.pre.r)

6.4. Visualize microbiome variation

Visualize the population density and highlight sample groups:

p <- plot_landscape(ps1.fcs.r, method = "NMDS", distance = "bray", col = "Day_of_study_factor", size =3)
print(p)

p <- plot_landscape(ps1.fcs.r, method = "NMDS", distance = "bray", col = "Treatment", size =3)
print(p)

p <- plot_landscape(ps1.fcs.pre.r, method = "NMDS", distance = "bray", col = "Day_of_study_factor", size =3)
print(p)

p <- plot_landscape(ps1.fcs.pre.r, method = "NMDS", distance = "bray", col = "Treatment", size =3)
print(p)

6.5. PERMANOVA significance test

PERMANOVA significance test for group-level differences

set.seed(343)
permanova <- adonis(t(fcs.otu) ~ Treatment + Day_of_study_factor,
                    data = fcs.meta, permutations=9999, method = "bray")

print(as.data.frame(permanova$aov.tab)) #no differences in beta-diversity between treatment groups
##                      Df   SumsOfSqs   MeanSqs   F.Model          R2 Pr(>F)
## Treatment             1   0.3128949 0.3128949  1.329623 0.003094676 0.1493
## Day_of_study_factor   7  35.3739595 5.0534228 21.474141 0.349864949 0.0001
## Residuals           278  65.4206145 0.2353260        NA 0.647040375     NA
## Total               286 101.1074689        NA        NA 1.000000000     NA
set.seed(456)
permanova.pre <- adonis(t(fcs.otu.pre) ~ Treatment + Day_of_study_factor,
                    data = fcs.meta.pre, permutations=9999, method = "bray")

print(as.data.frame(permanova.pre$aov.tab)) #differences in beta-diversity between treatment groups
##                      Df  SumsOfSqs   MeanSqs   F.Model          R2 Pr(>F)
## Treatment             1  0.4374209 0.4374209  1.711205 0.006996515 0.0411
## Day_of_study_factor   3 14.5368011 4.8456004 18.956153 0.232515071 0.0001
## Residuals           186 47.5456012 0.2556215        NA 0.760488414     NA
## Total               190 62.5198232        NA        NA 1.000000000     NA

6.6. Checking homogeneity assumption

# all faecal samples - dispersion between treatment groups
dist <- vegdist(t(fcs.otu), method="bray")

anova(betadisper(dist, fcs.meta$Treatment)) #no statistical significant differences in dispersion between treatment groups
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq    Mean Sq F value Pr(>F)
## Groups      1 0.00054 0.00054152  0.2655 0.6068
## Residuals 285 0.58138 0.00203994
dispersion <- betadisper(dist, group= fcs.meta$Treatment)

plot(dispersion, hull=FALSE, ellipse = TRUE)

# dispersion pre- vs post-weaning
anova(betadisper(dist, fcs.meta$Pre_or_post_weaning)) #statistical significant differences in dispersion between pre- and post-weaning samples
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## Groups      1 0.81987 0.81987  199.01 < 2.2e-16 ***
## Residuals 285 1.17414 0.00412                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion_weaning <- betadisper(dist, group= fcs.meta$Pre_or_post_weaning) 

plot(dispersion_weaning, hull=FALSE, ellipse = TRUE)

# dispersion in each time-point
anova(betadisper(dist, fcs.meta$Day_of_study_factor)) #statistical significant differences in dispersion between time points
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq  Mean Sq F value    Pr(>F)    
## Groups      7 0.42241 0.060344  9.8852 5.101e-11 ***
## Residuals 279 1.70315 0.006104                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion_day <- betadisper(dist, group = fcs.meta$Day_of_study_factor)

plot(dispersion_day, hull=FALSE, ellipse = TRUE)

# pre-weaning faecal samples - dispersion between treatment groups
dist.pre <- vegdist(t(fcs.otu.pre), method="bray")

anova(betadisper(dist.pre, fcs.meta.pre$Treatment)) #no statistical significant differences in dispersion between groups in pre-weaning faecal samples
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq    Mean Sq F value Pr(>F)
## Groups      1 0.00087 0.00086817  0.2949 0.5878
## Residuals 189 0.55648 0.00294433
dispersion.pre <- betadisper(dist.pre, group= fcs.meta.pre$Treatment)

plot(dispersion.pre, hull=FALSE, ellipse = TRUE)

# pre-weaning faecal samples - dispersion in each time-point
anova(betadisper(dist.pre, fcs.meta.pre$Day_of_study_factor)) #no statistical significant differences in dispersion between timepoints in pre-weaning faecal samples
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq  Mean Sq F value Pr(>F)
## Groups      3 0.03017 0.010056  1.9034 0.1305
## Residuals 187 0.98792 0.005283
dispersion.pre_day <- betadisper(dist.pre, group = fcs.meta.pre$Day_of_study_factor)

plot(dispersion.pre_day, hull=FALSE, ellipse = TRUE)

7. Abundance testing

7.1. Load data

ps1.glucan <- readRDS("./phyobjects/ps1.glucan.rds")

ps1.glucan@sam_data$Day_of_study_factor <- factor(ps1.glucan@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))

ps1.glucan.g <- tax_glom(ps1.glucan, "Genus")
ps1.glucan.g.r <- microbiome::transform(ps1.glucan.g, "compositional")

ps1.glucan.g.r <- subset_samples(ps1.glucan.g.r, Origin %in% c("faeces"))
ps1.glucan.g.r <- subset_samples(ps1.glucan.g.r, Pre_or_post_weaning %in% c("Pre_weaning"))

ps1.glucan.g.r <- filter_taxa(ps1.glucan.g.r, function(x) sum(x > 0.001) > (0.50*length(x)), TRUE)
ps1.glucan.p <- prune_taxa(taxa_sums(otu_table(ps1.glucan.g.r)) > 0, ps1.glucan.g.r)

# tax table with OTU column and best_hit
glucan.tax <- as.data.frame(tax_table(ps1.glucan.g.r))
glucan.tax$OTU <- rownames(glucan.tax)
tax_table(ps1.glucan.g.r) <- tax_table(as.matrix(glucan.tax))
ps1.glucan.g.bh <- format_to_besthit(ps1.glucan.g.r)
glucan.tax.bh <- as.data.frame(tax_table(ps1.glucan.g.bh))
colnames(glucan.tax.bh)[7] <- "OTU"

7.2. Prepare data

glucan.g.met <- meta(ps1.glucan.p)

glucan.g1 <- glucan.g.met[,c("Description","Ear_tag", "department_pre_weaning",
               "Treatment","Day_of_study_factor")]
# OTU table as dataframe:
glucan.g2 <- as.data.frame(t(otu_table(ps1.glucan.p)))
glucan.g2$Description <- rownames(glucan.g2)
# merge files
glucan.g3 <- merge(glucan.g1, glucan.g2, by = "Description")
colnames(glucan.g3)[6:36] <- paste("k__", sep = "", colnames(glucan.g3)[6:36])

7.3. Abundance testing

Abundance testing on pre-weaning faecal samples

# Results, warnings and messages have been inactivated for this markdown, as otherwise it will use too much space in the HTML output file.
tc.treatment <- taxa.compare(taxtab = glucan.g3, propmed.rel = "gamlss",
                        transform = "none", comvar = "Treatment",
                        adjustvar = c("Day_of_study_factor", "department_pre_weaning"),
                        personid = "Ear_tag", longitudinal = "yes",
                        p.adjust.method = "fdr")

tc.glucan05 <- subset(tc.treatment, pval.adjust.TreatmentY_glucan < .05)

tc.glucan05$id <- droplevels(as.factor(tc.glucan05$id))

glucan3.m <- reshape2::melt(glucan.g3)
glucan3.max <- ddply(glucan3.m, .(variable), summarise, max = max(value))
glucan3.max1 <- glucan3.max
glucan3.max1$variable <- droplevels(glucan3.max1$variable)
 
# subset
glucan.g4 <- subset(glucan3.m, variable %in% tc.glucan05$id &
                variable %in% glucan3.max1$variable)

# create vector of genus names (OTU codes) to include in plots
select.gen.glucan <- levels(droplevels(glucan.g4$variable)) # 27 genera
select.gen.glucan <- sub("k__", "", select.gen.glucan) # remove "k__"
select.gen.glucan

7.4. Editing output

tc.glucan.sub <- subset(tc.glucan05, id %in% glucan3.max1$variable)

# remove "k__" in id names
tc.glucan.sub$id <- as.factor(sub("k__", "", tc.glucan.sub$id))

# show tax names of output genera
glucan.g4.tax <- subset(glucan.tax.bh, OTU %in% select.gen.glucan) #significant genera

# create vectors of significant genera from comparisons
select.gen.glucan2 <- unique(c(as.character(select.gen.glucan)))

7.5. Prepare data for plotting

# filter for genera from first output series of GAMLSS tests (select.gen.glucan2).

# object 1: metadata
glucan.tr.met <- meta(ps1.glucan.g.r)

glucan.tr1 <- glucan.tr.met[,c("Description","Ear_tag", "department_pre_weaning",
               "Treatment","Day_of_study_factor")]

# object 2: OTU table
glucan.tr2 <- as.data.frame(t(otu_table(ps1.glucan.g.r)))
glucan.tr2$Description <- rownames(glucan.tr2)
# melt glucan.tr2
glucan.tr2.m <- reshape2::melt(glucan.tr2)
## Using Description as id variables
colnames(glucan.tr2.m) <- c("Description", "OTU", "abund")
# merge glucan.tr2, glucan.tax and glucan.tr1
glucan.tr3a <- base::merge(glucan.tr2.m, glucan.tax, by = "OTU")
glucan.tr3 <- base::merge(glucan.tr1, glucan.tr3a, by = "Description")

# subset to signif genera and max(abund) > 0.1
glucan.tr3.max <- ddply(glucan.tr3, .(OTU), summarise, max = max(abund))
glucan.tr3.max <- subset(glucan.tr3.max, max > .1)
glucan.tr3.max$OTU <- droplevels(glucan.tr3.max$OTU)
glucan.tr3.s <- subset(glucan.tr3, OTU %in% select.gen.glucan2 & OTU %in%
                     glucan.tr3.max$OTU)
glucan.tr3.s$OTU <- droplevels(glucan.tr3.s$OTU)

7.6. Plot presets

theme4 <- theme_classic() + 
  theme(panel.grid.major = element_line(colour = "grey80"),
        panel.spacing = unit(.5,"lines"),
        panel.border = element_rect(color = "black", fill = NA, size = .5),
        strip.background = element_blank(),
        strip.placement = "outside",
        text = element_text(size=15),
        axis.text.x = element_text(hjust = .5, vjust = .5),
        plot.title = element_text(size = 12))
labs_abd <- as_labeller(c(
  "faeces" = "Faeces", "ileum_digesta" = "Ileum digesta", "jejunum_digesta" = " Jejunum digesta"))

7.7. Plotting loop

# for-loop
sig.plot <- list()
for(i in 1:nlevels(glucan.tr3.s$OTU)){
  a = levels(glucan.tr3.s$OTU)[i]
  B = glucan.tr3.s[glucan.tr3.s$OTU == a,]
  p = ggplot(B, aes(x = Day_of_study_factor, y = abund, colour = Treatment, alpha = 0.95)) +
    geom_boxplot(outlier.size = 0) +
    geom_point(size = 2, position = position_jitterdodge()) +
    labs(x="time (d)", y="rel. abundance") +
    scale_y_continuous(limits = c(0,1), n.breaks = 6) +
    ggtitle(paste(B$Order,B$Family,B$Genus,B$OTU)) + theme4
  sig.plot[[i]] = p
}

# print plots
pdf("./figures/Differential_abundant_genera.pdf")
for (i in 1:nlevels(glucan.tr3.s$OTU)) {
    print(sig.plot[[i]])
}
dev.off() #Resulting figures can be found under Figure 5 of the manuscript
## png 
##   2

8. Distance-based PRC

8.1. Load data and subset

# only faeces pre-weaning
fcs.pre <- readRDS("./phyobjects/ps1.faeces.rds")
fcs.pre <- subset_samples(fcs.pre, Pre_or_post_weaning == "Pre_weaning")
fcs.pre <- prune_taxa(taxa_sums(otu_table(fcs.pre)) > 0, fcs.pre)

# only faeces post-weaning
fcs.post <- readRDS("./phyobjects/ps1.faeces.rds")
fcs.post <- subset_samples(fcs.post, Pre_or_post_weaning == "Post_weaning")
fcs.post <- prune_taxa(taxa_sums(otu_table(fcs.post)) > 0, fcs.post)

print(fcs.pre)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1640 taxa and 191 samples ]
## sample_data() Sample Data:       [ 191 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 1640 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1640 tips and 1639 internal nodes ]
print(fcs.post)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1159 taxa and 96 samples ]
## sample_data() Sample Data:       [ 96 samples by 115 sample variables ]
## tax_table()   Taxonomy Table:    [ 1159 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1159 tips and 1158 internal nodes ]
# transform to relative abundance
fcs.pre.r <- microbiome::transform(fcs.pre, "compositional")
fcs.post.r <- microbiome::transform(fcs.post, "compositional")

8.2. PRC genus level - pre-weaning

rank_names(fcs.pre.r)
## [1] "Domain" "Phylum" "Class"  "Order"  "Family" "Genus"
pre.gen.r <- tax_glom(fcs.pre.r, taxrank = rank_names(fcs.pre.r) [6]) #Aggregate to genus level

# Add Family level names
taxa_names(pre.gen.r) <- interaction(rownames(pre.gen.r@tax_table[,c("Family")]),pre.gen.r@tax_table[,c("Family")])
# Add Genus level names
taxa_names(pre.gen.r) <- interaction(rownames(pre.gen.r@tax_table[,c("Genus")]),pre.gen.r@tax_table[,c("Genus")])

# Genus level
df.pre.gen.r <- as.data.frame(t(abundances(pre.gen.r)))
df.pre.gen.r$Description <- rownames(df.pre.gen.r)
dfm.pre.gen.r <- merge(meta(pre.gen.r)[c("Description","Treatment", "Day_of_study")], df.pre.gen.r, by="Description")

response<-dfm.pre.gen.r[,4:length(dfm.pre.gen.r)]
time<-as.factor(dfm.pre.gen.r[,3])
treatment<-as.factor(dfm.pre.gen.r[,2])

# Set up objects and matrices
y <- deparse(substitute(response))
x <- deparse(substitute(treatment))
z <- deparse(substitute(time))

# Create formulas and design matrices
fla <- as.formula(paste("~", x, "+", z))
mf <- model.frame(fla, response, na.action = na.pass)
fla.zx <- as.formula(paste("~", z, ":", x))
fla.z <- as.formula(paste("~", z))
X = model.matrix(fla.zx, mf)[, -c(seq_len(nlevels(time) + 1))]
Z = model.matrix(fla.z, mf)[, -1]

# Calculate Weighted UniFrac distance matrix
dm.pre.gen.r.wUF <- UniFrac(pre.gen.r, weighted = T)

# Calculation of UniFrac distance-based RDA
dbRDA.pre <- capscale(dm.pre.gen.r.wUF ~ X + Condition(Z), data = dfm.pre.gen.r[,2:3], comm = response)

# Conversion of dbRDA to PRC compatible format
dbRDA.pre$terminfo$xlev = list(levels(time), levels(treatment))
names(dbRDA.pre$terminfo$xlev) = c(paste(z), paste(x))
dbRDA.pre$call <- match.call()
class(dbRDA.pre) <- c("prc", class(dbRDA.pre))

sum_dbprc <- summary(dbRDA.pre)
plot(dbRDA.pre, main = "dbPRC faecal samples w_unifrac genus level", select = abs(sum_dbprc$sp) > 0.2)

pdf(file = "./figures/prc_pre-weaning.pdf", height = 5, width = 8)
plot(dbRDA.pre, main = "dbPRC faecal samples w_unifrac genus level", select = abs(sum_dbprc$sp) > 0.2)
dev.off() #The final figure can be found under panel A in Supplementary Figure S4 of the manuscript
## png 
##   2

8.3. PRC genus level - post-weaning

post.gen.r <- tax_glom(fcs.post.r, taxrank = rank_names(fcs.post.r) [6])

rank_names(post.gen.r)
## [1] "Domain" "Phylum" "Class"  "Order"  "Family" "Genus"
# Add Family level names
taxa_names(post.gen.r) <- interaction(rownames(post.gen.r@tax_table[,c("Family")]),post.gen.r@tax_table[,c("Family")])
# Add Genus level names
taxa_names(post.gen.r) <- interaction(rownames(post.gen.r@tax_table[,c("Genus")]),post.gen.r@tax_table[,c("Genus")])

df.post.gen.r <- as.data.frame(t(abundances(post.gen.r)))
df.post.gen.r$Description <- rownames(df.post.gen.r)
dfm.post.gen.r <- merge(meta(post.gen.r)[c("Description","Treatment", "Day_of_study")], df.post.gen.r, by="Description")

response<-dfm.post.gen.r[,4:length(dfm.post.gen.r)]
time<-as.factor(dfm.post.gen.r[,3])
treatment<-as.factor(dfm.post.gen.r[,2])

# Set up objects and matrices
y <- deparse(substitute(response))
x <- deparse(substitute(treatment))
z <- deparse(substitute(time))

# Create formulas and design matrices
fla <- as.formula(paste("~", x, "+", z))
mf <- model.frame(fla, response, na.action = na.pass)
fla.zx <- as.formula(paste("~", z, ":", x))
fla.z <- as.formula(paste("~", z))
X = model.matrix(fla.zx, mf)[, -c(seq_len(nlevels(time) + 1))]
Z = model.matrix(fla.z, mf)[, -1]

# Calculate Weighted UniFrac distance matrix
dm.post.gen.r.wUF <- UniFrac(post.gen.r, weighted = T)

# Calculation of UniFrac distance based RDA
dbRDA.CFgen <- capscale(dm.post.gen.r.wUF ~ X + Condition(Z), data = dfm.post.gen.r[,2:3], comm = response)

# Conversion of dbRDA to PRC compatible format
dbRDA.CFgen$terminfo$xlev = list(levels(time), levels(treatment))
names(dbRDA.CFgen$terminfo$xlev) = c(paste(z), paste(x))
dbRDA.CFgen$call <- match.call()
class(dbRDA.CFgen) <- c("prc", class(dbRDA.CFgen))

sum_dbprc <- summary(dbRDA.CFgen)
plot(dbRDA.CFgen, main = "dbPRC faecal samples w_unifrac genus level", select = abs(sum_dbprc$sp) > 0.2) 

pdf(file = "./figures/prc_post-weaning.pdf", height = 5, width = 8)
plot(dbRDA.CFgen, main = "dbPRC faecal samples w_unifrac genus level", select = abs(sum_dbprc$sp) > 0.2)
dev.off() #The final figure can be found under panel B in Supplementary Figure S4 of the manuscript
## png 
##   2

9. Heatmap

9.1. Input data - top 35 families

ps1 <- readRDS("./phyobjects/ps1.glucan.rds")

ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))

ps1.fcs <- subset_samples(ps1, Origin %in% c("faeces"))
ps1.fcs.fam <- microbiome::aggregate_taxa(ps1.fcs, "Family")
# remove taxa with 0 abundance
ps1.fcs.fam <- prune_taxa(taxa_sums(otu_table(ps1.fcs.fam)) > 0, ps1.fcs.fam)
# add FAM column
ps1.fcs.fam.tax <- as.data.frame(ps1.fcs.fam@tax_table)
ps1.fcs.fam.tax$FAM <- rownames(ps1.fcs.fam.tax)
tax_table(ps1.fcs.fam) <- tax_table(as.matrix(ps1.fcs.fam.tax))
# relative abundance
ps1.fcs.fam.r <- microbiome::transform(ps1.fcs.fam, "compositional")

# select top 35 by summed rel. abund.
ps1.fcs.fam.abund <- abundances(ps1.fcs.fam.r)
ps1.fcs.fam.abund.m <- reshape2::melt(ps1.fcs.fam.abund)
colnames(ps1.fcs.fam.abund.m) <- c("FAM", "Description", "rel_abund")
ps1.fcs.fam.sum <- ddply(ps1.fcs.fam.abund.m, .(FAM), summarise, mean = mean(rel_abund))

ps1.fcs.fam.tax$reads <- taxa_sums(ps1.fcs.fam) # total reads per genus
ps1.fcs.fam.tax$sum_abund <- taxa_sums(ps1.fcs.fam.r) # summed relative abund.
ps1.fcs.fam.tax$mean_abund <- ps1.fcs.fam.sum$mean # mean relative abundance

# top 35
ps1.fcs.fam.top35 <- top_n(ps1.fcs.fam.tax, n = 35, wt = sum_abund)
ps1.fcs.fam.top35$FAM <- as.factor(ps1.fcs.fam.top35$FAM)

# top 35 with phy_tree
ps1.fcs.fam35.r <- subset_taxa(ps1.fcs.fam.r, FAM %in% ps1.fcs.fam.top35$FAM)

9.2. Plot presets

theme_hm <- theme_classic() + theme(
  axis.text.y = element_text(colour = 'black', face = 'italic'),
  legend.key = element_blank(), text = element_text(size = 20),
  panel.spacing.x=unit(0, "lines"),
  strip.background = element_rect(colour="black", fill="white"))

9.3. Create heatmap plot data

# create heatmap plot data
fam35.hm.df <- plot_heatmap(ps1.fcs.fam35.r, method = "CAP", distance = "bray",
                     formula = ~ Day_of_study_factor)$data

# clean up tax names
fam35.hm.df$taxname <- ifelse(fam35.hm.df$Family == "f__", 
              yes = paste(fam35.hm.df$Class, "(unassigned)"),
              no = paste(fam35.hm.df$Family))
fam35.hm.df$taxname <- gsub(fam35.hm.df$taxname, pattern = "[a-z]__",
                          replacement = "")

# summarise per pen per timepoint
fam35.hm.sum <- ddply(fam35.hm.df, .(Day_of_study_factor, Origin, Pen_no_pre_weaning,
                               taxname, FAM),
                   summarise, median = median(Abundance),
                   mean = mean(Abundance))
fam35.hm.sum$group <- interaction(fam35.hm.sum$Origin, 
                               fam35.hm.sum$Day_of_study_factor, fam35.hm.sum$Pen_no_pre_weaning,
                               drop=T)
fam35.hm.sum$taxname <- as.factor(fam35.hm.sum$taxname)

# Bray-curtis distance matrix for taxa clustering
fam35.hm.cast <- reshape2::dcast(fam35.hm.sum[,c(4,6,8)], taxname ~ group,
                              value.var = "median")
rownames(fam35.hm.cast) <- fam35.hm.cast[,1]
fam35.hm.mat <- as.matrix(fam35.hm.cast[,c(2:129)])
bray35.sp <- vegdist(fam35.hm.mat, method = "bray")

# reorder levels by clustering
sp.order <- hclust(bray35.sp)$order
fam35.hm.sum$taxname <- factor(fam35.hm.sum$taxname,
                            levels(fam35.hm.sum$taxname)[sp.order])
levels(fam35.hm.sum$taxname)
##  [1] "Akkermansiaceae"               "Corynebacteriaceae"           
##  [3] "Desulfovibrionaceae"           "Tannerellaceae"               
##  [5] "p-2534-18B5_gut_group"         "Pirellulaceae"                
##  [7] "Synergistaceae"                "Clostridiales_vadinBB60_group"
##  [9] "Marinifilaceae"                "Atopobiaceae"                 
## [11] "Eggerthellaceae"               "Streptococcaceae"             
## [13] "Succinivibrionaceae"           "Lactobacillaceae"             
## [15] "Prevotellaceae"                "Lachnospiraceae"              
## [17] "Ruminococcaceae"               "Muribaculaceae"               
## [19] "Veillonellaceae"               "Christensenellaceae"          
## [21] "Methanobacteriaceae"           "Clostridiaceae_1"             
## [23] "Peptostreptococcaceae"         "Spirochaetaceae"              
## [25] "Acidaminococcaceae"            "Erysipelotrichaceae"          
## [27] "Family_XIII"                   "Rikenellaceae"                
## [29] "Enterobacteriaceae"            "Bacteroidaceae"               
## [31] "Fusobacteriaceae"              "Enterococcaceae"              
## [33] "Campylobacteraceae"            "Actinomycetaceae"             
## [35] "Pasteurellaceae"

9.4. Plot heatmap

With Bray-Curtis complete-linkage clustering of taxa.

theme_hm <- theme_classic() + theme(
  axis.text.y = element_text(colour = 'black', face = 'italic'),
  axis.text.x = element_text(size = 10, colour = 'white', face = 'italic'),
  legend.key = element_blank(), text = element_text(size = 20),
  panel.spacing.x=unit(0, "lines"),
  strip.background = element_rect(colour="black", fill="white"))

p.fam35.hm <- ggplot(fam35.hm.sum, aes(x=Pen_no_pre_weaning, y=taxname)) +
  geom_tile(aes(fill=mean)) + 
  scale_fill_viridis("Rel. abundance", option = "E",
                     na.value = "black", trans="log10") +
  facet_grid(~Day_of_study_factor, scales = "free") +
  labs(x="Means per pen",y=NULL) +
  scale_y_discrete(position = "right") +
  theme_hm
p.fam35.hm
## Warning: Transformation introduced infinite values in discrete y-axis

pdf(file = "./figures/Heatmap_families.pdf", height = 9, width = 35)
p.fam35.hm
## Warning: Transformation introduced infinite values in discrete y-axis
dev.off() #The final figure can be found as Supplementary Figure S6 of the manuscript
## png 
##   2

10. Immunological analyses

Figures were generated using code below, and were then manually exported. This is why they are not showing up in the ‘figures’ folder. The unprocessed figures can be found under their individual folders.

10.1. Flow cytometry - t-test

# Rotate input file from folder 'input_data_flow_cytometry' to perform the analysis for other immunological parameters:
my_data <- read.table("./input_data_flow_cytometry/Flow_cytometry_DC_MLN_CDC1_CD8086_BG.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

my_data$id <- as.factor(my_data$id)

my_data$group <- as.factor(my_data$group)

my_data$time <- as.factor(my_data$time)

# Load the data
data("genderweight", package = "datarium")

# Show a sample of the data by group
set.seed(123)
my_data %>% sample_n_by(group, size = 2)
## # A tibble: 4 x 4
##   id    group    time  score
##   <fct> <fct>    <fct> <dbl>
## 1 7     Control  t2    305  
## 2 3     Control  t3    157  
## 3 6     Y_glucan t2    191  
## 4 3     Y_glucan t1     80.9
my_data %>%
  group_by(group) %>%
  get_summary_stats(score, type = "mean_sd")
## # A tibble: 2 x 5
##   group    variable     n  mean    sd
##   <fct>    <chr>    <dbl> <dbl> <dbl>
## 1 Control  score       24  132.  72.1
## 2 Y_glucan score       23  146.  74.2
# Save the data in two different vector
Y_glucan <- my_data %>%
  filter(group == "Y_glucan") %>%
  pull(score)
Control <- my_data %>%
  filter(group == "Control") %>%
  pull(score)

# Compute t-test
res <- t.test(Y_glucan, Control)
res
## 
##  Welch Two Sample t-test
## 
## data:  Y_glucan and Control
## t = 0.63763, df = 44.771, p-value = 0.527
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -29.39358  56.62040
## sample estimates:
## mean of x mean of y 
##  146.0217  132.4083
# Levene
levene_test(score ~ group, data = my_data)
## # A tibble: 1 x 4
##     df1   df2 statistic     p
##   <int> <int>     <dbl> <dbl>
## 1     1    45    0.0910 0.764
# Compute t-test
res <- t.test(score ~ group, data = my_data)
res
## 
##  Welch Two Sample t-test
## 
## data:  score by group
## t = -0.63763, df = 44.771, p-value = 0.527
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -56.62040  29.39358
## sample estimates:
##  mean in group Control mean in group Y_glucan 
##               132.4083               146.0217
stat.test <- my_data %>% 
  t_test(score ~ group) %>%
  add_significance()
stat.test
## # A tibble: 1 x 9
##   .y.   group1  group2      n1    n2 statistic    df     p p.signif
##   <chr> <chr>   <chr>    <int> <int>     <dbl> <dbl> <dbl> <chr>   
## 1 score Control Y_glucan    24    24    -0.638  44.8 0.527 ns
my_data %>%
  t_test(score ~ group, detailed = TRUE) %>%
  add_significance()
## # A tibble: 1 x 16
##   estimate estimate1 estimate2 .y.   group1 group2    n1    n2 statistic     p
##      <dbl>     <dbl>     <dbl> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>
## 1    -13.6      132.      146. score Contr~ Y_glu~    24    24    -0.638 0.527
## # ... with 6 more variables: df <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   method <chr>, alternative <chr>, p.signif <chr>
my_data %>% cohens_d(score ~ group, var.equal = FALSE)
## # A tibble: 1 x 7
##   .y.   group1  group2   effsize    n1    n2 magnitude 
## * <chr> <chr>   <chr>      <dbl> <int> <int> <ord>     
## 1 score Control Y_glucan  -0.186    24    24 negligible
# Create a box-plot
bxp <- ggboxplot(
  my_data, x = "group", y = "score", 
  ylab = "score", xlab = "Groups", add = "jitter"
)

# Add p-value and significance levels
stat.test <- stat.test %>% add_xy_position(x = "group")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).

10.2. Flow cytometry - two-way ANOVA

# Rotate input file from folder 'input_data_flow_cytometry' to perform the analysis for other immunological parameters:
my_data <- read.table("./input_data_flow_cytometry/Flow_cytometry_DC_MLN_CDC1_CD8086_BG.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

my_data$id <- as.factor(my_data$id)

my_data$group <- as.factor(my_data$group)

my_data$time <- as.factor(my_data$time)

# Inspect some random rows of the data by groups
set.seed(123)
my_data %>% sample_n_by(group, time, size = 1)
## # A tibble: 6 x 4
##   id    group    time  score
##   <fct> <fct>    <fct> <dbl>
## 1 7     Control  t1     69.4
## 2 7     Control  t2    305  
## 3 3     Control  t3    157  
## 4 6     Y_glucan t1     25.7
## 5 3     Y_glucan t2    179  
## 6 2     Y_glucan t3    186
#Extreme outliers are removed from the analysis. 

my_data %>%
  group_by(time) %>%
  identify_outliers(score)
## # A tibble: 1 x 6
##   time  id    group   score is.outlier is.extreme
##   <fct> <fct> <fct>   <dbl> <lgl>      <lgl>     
## 1 t3    1     Control   274 TRUE       FALSE
#Jitter

my_data$group <- factor(my_data$group, levels = c("Control", "Y_glucan", "Control_med", "Y_glucan_med"))

my_data %>%
  filter(group %in% c("Control", "Y_glucan", "Control_med", "Y_glucan_med" )) %>%
  ggplot(aes(x=time, y=score, fill = factor(group))) +
  geom_boxplot() + 
  labs(fill="group") +
  scale_fill_manual(values = c("#E69F00", "#669966", "#CCCCCC", "#999999")) +
  geom_point(position=position_jitterdodge(),alpha=0.5, size = 5)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).

# Two-way ANOVA
# http://rstudio-pubs-static.s3.amazonaws.com/476761_eb23c47a75d6447688cb793ac8a3e461.html

res.aov2 <- aov(score ~ group + time, data = my_data)
summary(res.aov2)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## group        1   2177    2177   0.872    0.356    
## time         2 133227   66613  26.674 2.93e-08 ***
## Residuals   43 107385    2497                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
# Two-way ANOVA with interaction effect
# These two calls are equivalent
res.aov3 <- aov(score ~ group * time, data = my_data)
res.aov3 <- aov(score ~ group + time + group:time, data = my_data)
summary(res.aov3)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## group        1   2177    2177   0.853    0.361    
## time         2 133227   66613  26.109 4.87e-08 ***
## group:time   2   2779    1389   0.545    0.584    
## Residuals   41 104606    2551                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
require("dplyr")
group_by(my_data, group, time) %>%
  summarise(
    count = n(),
    mean = mean(score, na.rm = TRUE),
    sd = sd(score, na.rm = TRUE)
  )
## `summarise()` regrouping output by 'group' (override with `.groups` argument)
## # A tibble: 6 x 5
## # Groups:   group [2]
##   group    time  count  mean    sd
##   <fct>    <fct> <int> <dbl> <dbl>
## 1 Control  t1        8  63.0  21.6
## 2 Control  t2        8 168.   75.2
## 3 Control  t3        8 166    51.9
## 4 Y_glucan t1        8  60.4  24.1
## 5 Y_glucan t2        8 200.   63.4
## 6 Y_glucan t3        8 167.   40.0
model.tables(res.aov2, type="means", se = TRUE)
## Design is unbalanced - use se.contrast() for se's
## Tables of means
## Grand mean
##          
## 139.0702 
## 
##  group 
##     Control Y_glucan
##       132.4      146
## rep    24.0       23
## 
##  time 
##        t1    t2    t3
##     62.06 183.9 166.4
## rep 15.00  16.0  16.0
TukeyHSD(res.aov2, which = "time")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = score ~ group + time, data = my_data)
## 
## $time
##           diff       lwr       upr     p adj
## t2-t1 121.8554  78.25793 165.45284 0.0000001
## t3-t1 104.3554  60.75793 147.95284 0.0000020
## t3-t2 -17.5000 -60.38851  25.38851 0.5867905
# 1. Homogeneity of variances
plot(res.aov3, 1)

library(car)
leveneTest(score ~ group*time, data = my_data)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  5  0.8258 0.5387
##       41
# 2. Normality
plot(res.aov3, 2)

# 3. Skewness

#Skewness between -2 and +2 acceptable (George and Malley, 2010). 
library(moments)
skewness(my_data$score, na.rm = TRUE)
## [1] 0.641101
# Extract the residuals
aov_residuals <- residuals(object = res.aov3)
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  aov_residuals
## W = 0.88361, p-value = 0.000225

10.3. Restimulation assay - t-test

# Rotate input file from folder 'input_data_restimulation_assay' to perform the analysis for other immunological parameters:
my_data <- read.table("./input_data_restimulation_assay/Restimulation_Assay_MLN_IL10_CONA5.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

my_data$id <- as.factor(my_data$id)

my_data$group <- as.factor(my_data$group)

my_data$time <- as.factor(my_data$time)

# Load the data
data("genderweight", package = "datarium")

# Show a sample of the data by group
set.seed(123)
my_data %>% sample_n_by(group, size = 2)
## # A tibble: 4 x 4
##   id    group    time  score
##   <fct> <fct>    <fct> <dbl>
## 1 7     Control  t2    2852.
## 2 3     Control  t3     719.
## 3 6     Y_glucan t2    1293.
## 4 3     Y_glucan t1     692.
my_data %>%
  group_by(group) %>%
  get_summary_stats(score, type = "mean_sd")
## # A tibble: 2 x 5
##   group    variable     n  mean    sd
##   <fct>    <chr>    <dbl> <dbl> <dbl>
## 1 Control  score       23 1099.  870.
## 2 Y_glucan score       23 1267.  881.
# Save the data in two different vector
Y_glucan <- my_data %>%
  filter(group == "Y_glucan") %>%
  pull(score)
Control <- my_data %>%
  filter(group == "Control") %>%
  pull(score)

# Compute t-test
res <- t.test(Y_glucan, Control)
res
## 
##  Welch Two Sample t-test
## 
## data:  Y_glucan and Control
## t = 0.65064, df = 43.993, p-value = 0.5187
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -352.4139  688.4389
## sample estimates:
## mean of x mean of y 
##  1267.478  1099.465
# Levene
levene_test(score ~ group, data = my_data)
## # A tibble: 1 x 4
##     df1   df2 statistic     p
##   <int> <int>     <dbl> <dbl>
## 1     1    44    0.0207 0.886
# Compute t-test
res <- t.test(score ~ group, data = my_data)
res
## 
##  Welch Two Sample t-test
## 
## data:  score by group
## t = -0.65064, df = 43.993, p-value = 0.5187
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -688.4389  352.4139
## sample estimates:
##  mean in group Control mean in group Y_glucan 
##               1099.465               1267.478
stat.test <- my_data %>% 
  t_test(score ~ group) %>%
  add_significance()
stat.test
## # A tibble: 1 x 9
##   .y.   group1  group2      n1    n2 statistic    df     p p.signif
##   <chr> <chr>   <chr>    <int> <int>     <dbl> <dbl> <dbl> <chr>   
## 1 score Control Y_glucan    24    24    -0.651  44.0 0.519 ns
my_data %>%
  t_test(score ~ group, detailed = TRUE) %>%
  add_significance()
## # A tibble: 1 x 16
##   estimate estimate1 estimate2 .y.   group1 group2    n1    n2 statistic     p
##      <dbl>     <dbl>     <dbl> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>
## 1    -168.     1099.     1267. score Contr~ Y_glu~    24    24    -0.651 0.519
## # ... with 6 more variables: df <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   method <chr>, alternative <chr>, p.signif <chr>
my_data %>% cohens_d(score ~ group, var.equal = FALSE)
## # A tibble: 1 x 7
##   .y.   group1  group2   effsize    n1    n2 magnitude 
## * <chr> <chr>   <chr>      <dbl> <int> <int> <ord>     
## 1 score Control Y_glucan  -0.192    24    24 negligible
# Create a box-plot
bxp <- ggboxplot(
  my_data, x = "group", y = "score", 
  ylab = "score", xlab = "Groups", add = "jitter"
)

# Add p-value and significance levels
stat.test <- stat.test %>% add_xy_position(x = "group")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).

10.4. Rest. assay - two-way ANOVA

Restimulation assay - two-way ANOVA

# Rotate input file from folder 'input_data_restimulation_assay' to perform the analysis for other immunological parameters:
my_data <- read.table("./input_data_restimulation_assay/Restimulation_Assay_MLN_IL10_CONA5.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

my_data$id <- as.factor(my_data$id)

my_data$group <- as.factor(my_data$group)

my_data$time <- as.factor(my_data$time)

# Inspect some random rows of the data by groups
set.seed(123)
my_data %>% sample_n_by(group, time, size = 1)
## # A tibble: 6 x 4
##   id    group    time  score
##   <fct> <fct>    <fct> <dbl>
## 1 7     Control  t1     991.
## 2 7     Control  t2    2852.
## 3 3     Control  t3     719.
## 4 6     Y_glucan t1    1289.
## 5 3     Y_glucan t2    1955.
## 6 2     Y_glucan t3     381.
#Extreme outliers are removed from the analysis. 

my_data %>%
  group_by(time) %>%
  identify_outliers(score)
## # A tibble: 2 x 6
##   time  id    group    score is.outlier is.extreme
##   <fct> <fct> <fct>    <dbl> <lgl>      <lgl>     
## 1 t1    1     Y_glucan 2231. TRUE       FALSE     
## 2 t3    1     Y_glucan 1545. TRUE       FALSE
#Jitter

my_data$group <- factor(my_data$group, levels = c("Control", "Y_glucan", "Control_med", "Y_glucan_med"))

my_data %>%
  filter(group %in% c("Control", "Y_glucan", "Control_med", "Y_glucan_med" )) %>%
  ggplot(aes(x=time, y=score, fill = factor(group))) +
  geom_boxplot() + 
  labs(fill="group") +
  scale_fill_manual(values = c("#E69F00", "#669966", "#CCCCCC", "#999999")) +
  geom_point(position=position_jitterdodge(),alpha=0.5, size = 5)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).

# Two-way ANOVA
# http://rstudio-pubs-static.s3.amazonaws.com/476761_eb23c47a75d6447688cb793ac8a3e461.html

res.aov2 <- aov(score ~ group + time, data = my_data)
summary(res.aov2)
##             Df   Sum Sq Mean Sq F value   Pr(>F)    
## group        1   324624  324624   0.701    0.407    
## time         2 14285532 7142766  15.420 9.52e-06 ***
## Residuals   42 19455387  463223                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
# Two-way ANOVA with interaction effect
# These two calls are equivalent
res.aov3 <- aov(score ~ group * time, data = my_data)
res.aov3 <- aov(score ~ group + time + group:time, data = my_data)
summary(res.aov3)
##             Df   Sum Sq Mean Sq F value   Pr(>F)    
## group        1   324624  324624   0.686    0.412    
## time         2 14285532 7142766  15.095 1.31e-05 ***
## group:time   2   527873  263936   0.558    0.577    
## Residuals   40 18927514  473188                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
require("dplyr")
group_by(my_data, group, time) %>%
  summarise(
    count = n(),
    mean = mean(score, na.rm = TRUE),
    sd = sd(score, na.rm = TRUE)
  )
## `summarise()` regrouping output by 'group' (override with `.groups` argument)
## # A tibble: 6 x 5
## # Groups:   group [2]
##   group    time  count  mean    sd
##   <fct>    <fct> <int> <dbl> <dbl>
## 1 Control  t1        8  679.  438.
## 2 Control  t2        8 1928.  958.
## 3 Control  t3        8  633.  279.
## 4 Y_glucan t1        8 1166.  590.
## 5 Y_glucan t2        8 1939. 1034.
## 6 Y_glucan t3        8  685.  401.
model.tables(res.aov2, type="means", se = TRUE)
## Design is unbalanced - use se.contrast() for se's
## Tables of means
## Grand mean
##          
## 1183.471 
## 
##  group 
##     Control Y_glucan
##        1099     1267
## rep      23       23
## 
##  time 
##      t1   t2    t3
##     912 1933 655.2
## rep  15   16  15.0
TukeyHSD(res.aov2, which = "time")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = score ~ group + time, data = my_data)
## 
## $time
##             diff        lwr       upr     p adj
## t2-t1  1021.1610   426.8876 1615.4345 0.0004243
## t3-t1  -256.8019  -860.5843  346.9805 0.5603017
## t3-t2 -1277.9629 -1872.2364 -683.6895 0.0000151
# 1. Homogeneity of variances
plot(res.aov3, 1)

library(car)
leveneTest(score ~ group*time, data = my_data)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value  Pr(>F)  
## group  5  2.7583 0.03119 *
##       40                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 2. Normality
plot(res.aov3, 2)

# 3. Skewness

#Skewness between -2 and +2 acceptable (George and Malley, 2010). 
library(moments)
skewness(my_data$score, na.rm = TRUE)
## [1] 1.011202
# Extract the residuals
aov_residuals <- residuals(object = res.aov3)
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  aov_residuals
## W = 0.96949, p-value = 0.2653

10.5. Vaccination response t-test

# Rotate input file from folder 'input_data_vaccination_response' to perform the analysis for other immunological parameters:
my_data <- read.table("./input_data_vaccination_response/Vaccination_Salmonella_IgA_BG.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

my_data$id <- as.factor(my_data$id)

my_data$group <- as.factor(my_data$group)

my_data$time <- as.factor(my_data$time)

# Load the data
data("genderweight", package = "datarium")

# Show a sample of the data by group
set.seed(123)
my_data %>% sample_n_by(group, size = 2)
## # A tibble: 4 x 4
##   id    group    time   score
##   <fct> <fct>    <fct>  <dbl>
## 1 15    Control  t2    0.226 
## 2 15    Control  t1    0.0893
## 3 14    Y_Glucan t1    0.0675
## 4 3     Y_Glucan t1    0.144
my_data %>%
  group_by(group) %>%
  get_summary_stats(score, type = "mean_sd")
## # A tibble: 2 x 5
##   group    variable     n  mean    sd
##   <fct>    <chr>    <dbl> <dbl> <dbl>
## 1 Control  score       47 0.391 0.344
## 2 Y_Glucan score       45 0.342 0.249
# Save the data in two different vector
Y_glucan <- my_data %>%
  filter(group == "Y_Glucan") %>%
  pull(score)
Control <- my_data %>%
  filter(group == "Control") %>%
  pull(score)

# Compute t-test
res <- t.test(Y_glucan, Control)
res
## 
##  Welch Two Sample t-test
## 
## data:  Y_glucan and Control
## t = -0.78629, df = 83.872, p-value = 0.4339
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.17303530  0.07497459
## sample estimates:
## mean of x mean of y 
## 0.3420867 0.3911170
# Levene
levene_test(score ~ group, data = my_data)
## # A tibble: 1 x 4
##     df1   df2 statistic     p
##   <int> <int>     <dbl> <dbl>
## 1     1    90      2.61 0.110
# Compute t-test
res <- t.test(score ~ group, data = my_data)
res
## 
##  Welch Two Sample t-test
## 
## data:  score by group
## t = 0.78629, df = 83.872, p-value = 0.4339
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.07497459  0.17303530
## sample estimates:
##  mean in group Control mean in group Y_Glucan 
##              0.3911170              0.3420867
stat.test <- my_data %>% 
  t_test(score ~ group) %>%
  add_significance()
stat.test
## # A tibble: 1 x 9
##   .y.   group1  group2      n1    n2 statistic    df     p p.signif
##   <chr> <chr>   <chr>    <int> <int>     <dbl> <dbl> <dbl> <chr>   
## 1 score Control Y_Glucan    48    48     0.786  83.9 0.434 ns
my_data %>%
  t_test(score ~ group, detailed = TRUE) %>%
  add_significance()
## # A tibble: 1 x 16
##   estimate estimate1 estimate2 .y.   group1 group2    n1    n2 statistic     p
##      <dbl>     <dbl>     <dbl> <chr> <chr>  <chr>  <int> <int>     <dbl> <dbl>
## 1   0.0490     0.391     0.342 score Contr~ Y_Glu~    48    48     0.786 0.434
## # ... with 6 more variables: df <dbl>, conf.low <dbl>, conf.high <dbl>,
## #   method <chr>, alternative <chr>, p.signif <chr>
my_data %>% cohens_d(score ~ group, var.equal = FALSE)
## # A tibble: 1 x 7
##   .y.   group1  group2   effsize    n1    n2 magnitude 
## * <chr> <chr>   <chr>      <dbl> <int> <int> <ord>     
## 1 score Control Y_Glucan   0.163    48    48 negligible
# Create a box-plot
bxp <- ggboxplot(
  my_data, x = "group", y = "score", 
  ylab = "score", xlab = "Groups", add = "jitter"
)

# Add p-value and significance levels
stat.test <- stat.test %>% add_xy_position(x = "group")
bxp + 
  stat_pvalue_manual(stat.test, tip.length = 0) +
  labs(subtitle = get_test_label(stat.test, detailed = TRUE))
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
## Warning: Removed 4 rows containing missing values (geom_point).

10.6. Vaccination resp. - ANOVA

Vaccination response - two-way ANOVA

# Rotate input file from folder 'input_data_vaccination_response' to perform the analysis for other immunological parameters:
my_data <- read.table("./input_data_vaccination_response/Vaccination_Salmonella_IgA_BG.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

my_data$id <- as.factor(my_data$id)

my_data$group <- as.factor(my_data$group)

my_data$time <- as.factor(my_data$time)

# Inspect some random rows of the data by groups
set.seed(123)
my_data %>% sample_n_by(group, time, size = 1)
## # A tibble: 6 x 4
##   id    group    time   score
##   <fct> <fct>    <fct>  <dbl>
## 1 15    Control  t1    0.0893
## 2 15    Control  t2    0.226 
## 3 3     Control  t3    1.01  
## 4 14    Y_Glucan t1    0.0675
## 5 3     Y_Glucan t2    0.222 
## 6 10    Y_Glucan t3    0.509
#Extreme outliers are removed from the analysis. 

my_data %>%
  group_by(time) %>%
  identify_outliers(score)
## # A tibble: 4 x 6
##   time  id    group    score is.outlier is.extreme
##   <fct> <fct> <fct>    <dbl> <lgl>      <lgl>     
## 1 t1    4     Y_Glucan 0.238 TRUE       FALSE     
## 2 t1    5     Y_Glucan 0.304 TRUE       FALSE     
## 3 t1    11    Y_Glucan 0.256 TRUE       FALSE     
## 4 t2    16    Control  1.18  TRUE       FALSE
#Jitter

my_data$group <- factor(my_data$group, levels = c("Control", "Y_Glucan", "Control_med", "Y_glucan_med"))

my_data %>%
  filter(group %in% c("Control", "Y_glucan", "Control_med", "Y_glucan_med" )) %>%
  ggplot(aes(x=time, y=score, fill = factor(group))) +
  geom_boxplot() + 
  labs(fill="group") +
  scale_fill_manual(values = c("#E69F00", "#669966", "#CCCCCC", "#999999")) +
  geom_point(position=position_jitterdodge(),alpha=0.5, size = 5)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).

# Two-way ANOVA
# http://rstudio-pubs-static.s3.amazonaws.com/476761_eb23c47a75d6447688cb793ac8a3e461.html

res.aov2 <- aov(score ~ group + time, data = my_data)
summary(res.aov2)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## group        1  0.055  0.0553   1.032    0.312    
## time         2  3.444  1.7219  32.156 3.29e-11 ***
## Residuals   88  4.712  0.0535                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 4 observations deleted due to missingness
# Two-way ANOVA with interaction effect
# These two calls are equivalent
res.aov3 <- aov(score ~ group * time, data = my_data)
res.aov3 <- aov(score ~ group + time + group:time, data = my_data)
summary(res.aov3)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## group        1  0.055  0.0553   1.039    0.311    
## time         2  3.444  1.7219  32.384 3.28e-11 ***
## group:time   2  0.140  0.0698   1.313    0.274    
## Residuals   86  4.573  0.0532                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 4 observations deleted due to missingness
require("dplyr")
group_by(my_data, group, time) %>%
  summarise(
    count = n(),
    mean = mean(score, na.rm = TRUE),
    sd = sd(score, na.rm = TRUE)
  )
## `summarise()` regrouping output by 'group' (override with `.groups` argument)
## # A tibble: 6 x 5
## # Groups:   group [2]
##   group    time  count   mean     sd
##   <fct>    <fct> <int>  <dbl>  <dbl>
## 1 Control  t1       16 0.0955 0.0225
## 2 Control  t2       16 0.397  0.279 
## 3 Control  t3       16 0.662  0.343 
## 4 Y_Glucan t1       16 0.138  0.0816
## 5 Y_Glucan t2       16 0.372  0.189 
## 6 Y_Glucan t3       16 0.516  0.272
model.tables(res.aov2, type="means", se = TRUE)
## Design is unbalanced - use se.contrast() for se's
## Tables of means
## Grand mean
##           
## 0.3671348 
## 
##  group 
##     Control Y_Glucan
##      0.3911   0.3421
## rep 47.0000  45.0000
## 
##  time 
##          t1      t2      t3
##      0.1173  0.3845  0.5915
## rep 30.0000 31.0000 31.0000
TukeyHSD(res.aov2, which = "time")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = score ~ group + time, data = my_data)
## 
## $time
##            diff        lwr       upr     p adj
## t2-t1 0.2672593 0.12597045 0.4085482 0.0000589
## t3-t1 0.4742513 0.33296238 0.6155402 0.0000000
## t3-t2 0.2069919 0.06686593 0.3471179 0.0019546
# 1. Homogeneity of variances
plot(res.aov3, 1)

library(car)
leveneTest(score ~ group*time, data = my_data)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value    Pr(>F)    
## group  5  6.1382 6.558e-05 ***
##       86                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 2. Normality
plot(res.aov3, 2)

# 3. Skewness

#Skewness between -2 and +2 acceptable (George and Malley, 2010). 
library(moments)
skewness(my_data$score, na.rm = TRUE)
## [1] 1.179768
# Extract the residuals
aov_residuals <- residuals(object = res.aov3)
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  aov_residuals
## W = 0.94218, p-value = 0.0004861

11. Correlation plot

11.1. Prepare data

ile <- readRDS("./phyobjects/ps1.decontam.rds")
ile <- subset_samples(ile, Day_of_study_factor == "Day_27")
ile <- subset_samples(ile, Origin == "ileum_digesta")
ile <- microbiome::aggregate_taxa(ile, "Genus")
ile.r <- microbiome::transform(ile, "compositional")
#Prevalence filter:
ile.r <- filter_taxa(ile.r, function(x) sum(x > 0.001) > (0.50*length(x)), TRUE)

11.2. Extract data from phyloseq object

OTU1 = as(otu_table(ile.r), "matrix")
# transpose if necessary
if(taxa_are_rows(ile.r)){OTU1 <- t(OTU1)}
# Coerce to data.frame
OTUdf = as.data.frame(OTU1)
write.csv(OTUdf, file = "./output_data/OTUdf_all_treatments_day27_ileum_genera_filter_sum0.001_prev0.5.csv") #this file was used to manually combine genera abundances with day 27 ileum sample data

11.3. Import ileum data file

my_data <- read.table("./input_data/ileum_metadata_plus_genera_abundances_for_correlation_figure.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)
my_data <- my_data
head(my_data, 6)
##   Weight_birth Weight_day_14 Weight_at_day_26 pH_jejunum pH_ileum pH_caecum
## 1         1.28          4.38              7.9       6.06     7.11      6.18
## 2         1.68          5.90             10.0       7.10     8.40      6.32
## 3         2.13          4.67              7.8       7.05     8.03      6.08
## 4         1.85          5.73               NA       6.84     8.14      6.43
## 5         1.23          4.25              7.8         NA       NA      6.10
## 6         2.07          6.12              9.6       7.66     8.71      6.52
##   pH_colon CTL_perc_MLN_d27 CTL_ki67_MLN_d27 MemoryT_perc_MLN_d27
## 1     6.51             21.3             26.4                 5.81
## 2     6.95             19.5             21.4                 7.30
## 3     7.31             15.7             20.2                 5.77
## 4     5.94             17.9             28.1                 2.40
## 5     6.86             17.9             29.2                 4.32
## 6     5.79             24.6             29.3                 2.91
##   MemoryT_perc_CD4highCD8low_MLN_d27 MemoryT_Ki67_CD4highCD8low_MLN_d27
## 1                               43.1                               45.5
## 2                               73.4                               28.8
## 3                               54.5                               37.0
## 4                               76.0                               51.9
## 5                               52.0                               35.4
## 6                               55.2                               39.6
##   MemoryT_perc_CD4lowCD8high_MLN_d27 MemoryT_Ki67_CD4lowCD8high_MLN_d27
## 1                               55.1                               36.8
## 2                               24.5                               24.8
## 3                               43.5                               24.0
## 4                               21.5                               28.3
## 5                               43.3                               49.8
## 6                               41.1                               41.9
##   HelperT_perc_MLN_d27 HelperT_Ki67_MLN_d27 Tregs_perc_MLN_d27
## 1                 68.1                 18.3               7.26
## 2                 65.3                 22.1              10.80
## 3                 72.3                 13.8               7.82
## 4                 73.9                 13.6               5.74
## 5                 69.7                 13.6               5.88
## 6                 66.1                 23.4              10.10
##   Gammadelta_perc_MLN_d27 Gammadelta_Ki67_MLN_d27 cdc1_perc_MLN_d27
## 1                    1.13                    25.4             0.410
## 2                    0.81                    17.8             0.170
## 3                    0.74                    16.1             0.085
## 4                    1.51                    23.5             0.860
## 5                    1.06                    17.3             0.300
## 6                    1.33                    25.2             0.150
##   cdc1_8086_MLN_d27 cdc2_perc_MLN_d27 cdc2_8086_MLN_d27 pdc_perc_MLN_d27
## 1              62.9              0.56               205             2.77
## 2              78.4              0.27               187             2.33
## 3              75.8              0.21               202            10.80
## 4              57.8              1.59               116             4.83
## 5             103.0              0.11               174             6.09
## 6              80.9              0.25               209             2.53
##   pdc_8086_MLN_d27 mln_restimulation_il10_ConA_5_d27
## 1            127.0                         2230.7110
## 2             65.5                          777.3465
## 3            160.0                          458.0275
## 4            142.0                          189.4585
## 5            144.0                           84.3885
## 6            130.0                          691.9395
##   mln_restimulation_il10_lps_10_d27 mln_restimulation_il10_medium_d27
## 1                           21.7105                           5.21850
## 2                           13.7935                           2.31525
## 3                            5.7960                           0.19950
## 4                            9.9365                           2.27500
## 5                           12.3235                           2.12975
## 6                           30.1875                           9.79300
##   mln_restimulation_TNF_ConA_5_d27 mln_restimulation_TNF_LPS_10_d27
## 1                          76.5730                           8.9740
## 2                          17.4125                           4.2630
## 3                          20.4855                           3.6855
## 4                          18.8195                           2.3170
## 5                           7.7980                           0.9835
## 6                          21.8295                           3.8605
##   mln_restimulation_TNF_medium_d27 Lactobacillus Streptococcus
## 1                          7.59850     0.4396062   0.004990015
## 2                          2.45000     0.5616382   0.073144223
## 3                          1.91975     0.4489857   0.105183643
## 4                          1.48400     0.7591559   0.020607325
## 5                          1.34400     0.2453164   0.065894422
## 6                          3.44050     0.5209840   0.039087397
##   Clostridium_sensu_stricto_1 Romboutsia Terrisporobacter Turicibacter
## 1                  0.08947352  0.1630176       0.02471804   0.25448293
## 2                  0.04590598  0.1290964       0.07606056   0.07956653
## 3                  0.06719298  0.1237184       0.08576225   0.05941798
## 4                  0.02507603  0.1233235       0.05152122   0.01617940
## 5                  0.25871272  0.2266204       0.08034537   0.06640560
## 6                  0.11026820  0.1811106       0.03124736   0.08165567
##   Veillonella Actinobacillus Haemophilus
## 1 0.000000000    0.000000000 0.013366670
## 2 0.008956175    0.001144223 0.007180876
## 3 0.005020589    0.015246009 0.037062541
## 4 0.000000000    0.001846910 0.000000000
## 5 0.007271007    0.010246979 0.026628045
## 6 0.001180438    0.000000000 0.007578169
# Summary of data
summary(my_data)
##   Weight_birth   Weight_day_14   Weight_at_day_26   pH_jejunum   
##  Min.   :1.040   Min.   :2.790   Min.   : 5.030   Min.   :6.060  
##  1st Qu.:1.235   1st Qu.:4.290   1st Qu.: 7.800   1st Qu.:6.997  
##  Median :1.650   Median :5.000   Median : 8.575   Median :7.315  
##  Mean   :1.548   Mean   :4.839   Mean   : 8.381   Mean   :7.131  
##  3rd Qu.:1.725   3rd Qu.:5.510   3rd Qu.: 9.400   3rd Qu.:7.407  
##  Max.   :2.130   Max.   :6.120   Max.   :10.000   Max.   :7.660  
##                                  NA's   :1        NA's   :3      
##     pH_ileum       pH_caecum        pH_colon     CTL_perc_MLN_d27
##  Min.   :7.070   Min.   :6.080   Min.   :5.790   Min.   :13.40   
##  1st Qu.:7.300   1st Qu.:6.175   1st Qu.:6.470   1st Qu.:17.90   
##  Median :7.640   Median :6.390   Median :6.730   Median :19.60   
##  Mean   :7.763   Mean   :6.402   Mean   :6.694   Mean   :20.04   
##  3rd Qu.:8.140   3rd Qu.:6.555   3rd Qu.:6.915   3rd Qu.:22.85   
##  Max.   :8.710   Max.   :7.010   Max.   :7.730   Max.   :26.40   
##  NA's   :2                                       NA's   :1       
##  CTL_ki67_MLN_d27 MemoryT_perc_MLN_d27 MemoryT_perc_CD4highCD8low_MLN_d27
##  Min.   :16.10    Min.   :1.070        Min.   :26.20                     
##  1st Qu.:24.02    1st Qu.:2.265        1st Qu.:52.40                     
##  Median :27.25    Median :2.910        Median :57.00                     
##  Mean   :27.68    Mean   :3.482        Mean   :60.04                     
##  3rd Qu.:29.27    3rd Qu.:4.553        3rd Qu.:73.55                     
##  Max.   :45.20    Max.   :7.300        Max.   :90.50                     
##  NA's   :1        NA's   :1            NA's   :1                         
##  MemoryT_Ki67_CD4highCD8low_MLN_d27 MemoryT_perc_CD4lowCD8high_MLN_d27
##  Min.   :28.80                      Min.   : 9.52                     
##  1st Qu.:37.00                      1st Qu.:23.98                     
##  Median :40.85                      Median :39.40                     
##  Mean   :44.58                      Mean   :37.49                     
##  3rd Qu.:48.88                      3rd Qu.:43.45                     
##  Max.   :69.80                      Max.   :71.30                     
##  NA's   :1                          NA's   :1                         
##  MemoryT_Ki67_CD4lowCD8high_MLN_d27 HelperT_perc_MLN_d27 HelperT_Ki67_MLN_d27
##  Min.   :23.80                      Min.   :60.90        Min.   :13.60       
##  1st Qu.:30.27                      1st Qu.:65.50        1st Qu.:15.28       
##  Median :42.85                      Median :68.20        Median :18.80       
##  Mean   :40.00                      Mean   :68.32        Mean   :20.79       
##  3rd Qu.:49.20                      3rd Qu.:71.65        3rd Qu.:23.23       
##  Max.   :55.80                      Max.   :73.90        Max.   :33.20       
##  NA's   :1                          NA's   :1            NA's   :1           
##  Tregs_perc_MLN_d27 Gammadelta_perc_MLN_d27 Gammadelta_Ki67_MLN_d27
##  Min.   : 5.380     Min.   :0.740           Min.   :12.10          
##  1st Qu.: 6.405     1st Qu.:1.175           1st Qu.:17.95          
##  Median : 8.030     Median :1.425           Median :22.75          
##  Mean   : 8.475     Mean   :1.678           Mean   :23.34          
##  3rd Qu.: 9.805     3rd Qu.:1.952           3rd Qu.:25.35          
##  Max.   :14.500     Max.   :4.140           Max.   :44.80          
##  NA's   :1          NA's   :1               NA's   :1              
##  cdc1_perc_MLN_d27 cdc1_8086_MLN_d27 cdc2_perc_MLN_d27 cdc2_8086_MLN_d27
##  Min.   :0.085     Min.   : 25.7     Min.   :0.110     Min.   :116.0    
##  1st Qu.:0.210     1st Qu.: 38.5     1st Qu.:0.285     1st Qu.:166.0    
##  Median :0.350     Median : 62.9     Median :0.520     Median :187.0    
##  Mean   :0.371     Mean   : 58.5     Mean   :0.546     Mean   :181.8    
##  3rd Qu.:0.435     3rd Qu.: 72.6     3rd Qu.:0.665     3rd Qu.:200.5    
##  Max.   :0.860     Max.   :103.0     Max.   :1.590     Max.   :229.0    
##                                                                         
##  pdc_perc_MLN_d27 pdc_8086_MLN_d27 mln_restimulation_il10_ConA_5_d27
##  Min.   : 1.490   Min.   : 65.5    Min.   :  84.39                  
##  1st Qu.: 2.650   1st Qu.:128.5    1st Qu.: 556.84                  
##  Median : 4.620   Median :138.0    Median : 838.80                  
##  Mean   : 4.645   Mean   :140.6    Mean   : 854.96                  
##  3rd Qu.: 5.825   3rd Qu.:143.0    3rd Qu.: 973.74                  
##  Max.   :10.800   Max.   :215.0    Max.   :2230.71                  
##                                    NA's   :1                        
##  mln_restimulation_il10_lps_10_d27 mln_restimulation_il10_medium_d27
##  Min.   : 0.000                    Min.   : 0.01925                 
##  1st Qu.: 6.465                    1st Qu.: 2.28506                 
##  Median :13.059                    Median : 3.47637                 
##  Mean   :12.312                    Mean   : 5.66500                 
##  3rd Qu.:16.659                    3rd Qu.: 8.89394                 
##  Max.   :30.188                    Max.   :17.21475                 
##  NA's   :1                         NA's   :1                        
##  mln_restimulation_TNF_ConA_5_d27 mln_restimulation_TNF_LPS_10_d27
##  Min.   :  4.239                  Min.   : 0.9835                 
##  1st Qu.: 18.116                  1st Qu.: 3.5332                 
##  Median : 21.829                  Median : 5.6840                 
##  Mean   : 43.060                  Mean   : 6.2064                 
##  3rd Qu.: 53.100                  3rd Qu.: 7.6212                 
##  Max.   :174.409                  Max.   :18.9140                 
##                                                                   
##  mln_restimulation_TNF_medium_d27 Lactobacillus    Streptococcus    
##  Min.   : 1.344                   Min.   :0.1383   Min.   :0.00225  
##  1st Qu.: 2.185                   1st Qu.:0.4170   1st Qu.:0.01484  
##  Median : 4.106                   Median :0.5616   Median :0.02447  
##  Mean   : 4.936                   Mean   :0.5801   Mean   :0.05344  
##  3rd Qu.: 6.857                   3rd Qu.:0.7991   3rd Qu.:0.06952  
##  Max.   :13.106                   Max.   :0.9066   Max.   :0.21976  
##                                                                     
##  Clostridium_sensu_stricto_1   Romboutsia       Terrisporobacter  
##  Min.   :0.00000             Min.   :0.005936   Min.   :0.007063  
##  1st Qu.:0.02921             1st Qu.:0.033550   1st Qu.:0.027560  
##  Median :0.06719             Median :0.109685   Median :0.036324  
##  Mean   :0.10622             Mean   :0.098730   Mean   :0.060560  
##  3rd Qu.:0.18144             3rd Qu.:0.126407   3rd Qu.:0.083054  
##  Max.   :0.28562             Max.   :0.226620   Max.   :0.183378  
##                                                                   
##   Turicibacter      Veillonella       Actinobacillus      Haemophilus      
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.01423   1st Qu.:0.000000   1st Qu.:0.000000   1st Qu.:0.005349  
##  Median :0.05942   Median :0.001180   Median :0.001127   Median :0.007578  
##  Mean   :0.06570   Mean   :0.006790   Mean   :0.002613   Mean   :0.013364  
##  3rd Qu.:0.08061   3rd Qu.:0.006146   3rd Qu.:0.001989   3rd Qu.:0.012141  
##  Max.   :0.25448   Max.   :0.052143   Max.   :0.015246   Max.   :0.057469  
## 
# Turn into numeric value
my_data$cdc2_8086_MLN_d27<- as.numeric(my_data$cdc2_8086_MLN_d27)

str(my_data)
## 'data.frame':    15 obs. of  40 variables:
##  $ Weight_birth                      : num  1.28 1.68 2.13 1.85 1.23 2.07 1.68 1.04 1.23 1.24 ...
##  $ Weight_day_14                     : num  4.38 5.9 4.67 5.73 4.25 6.12 5.07 4 3.8 4.33 ...
##  $ Weight_at_day_26                  : num  7.9 10 7.8 NA 7.8 9.6 8.96 7.1 7.53 8.3 ...
##  $ pH_jejunum                        : num  6.06 7.1 7.05 6.84 NA 7.66 7.32 7.31 NA 7.39 ...
##  $ pH_ileum                          : num  7.11 8.4 8.03 8.14 NA 8.71 7.64 7.55 7.62 7.07 ...
##  $ pH_caecum                         : num  6.18 6.32 6.08 6.43 6.1 6.52 6.39 6.59 7.01 6.16 ...
##  $ pH_colon                          : num  6.51 6.95 7.31 5.94 6.86 5.79 6.73 6.73 6.75 6.51 ...
##  $ CTL_perc_MLN_d27                  : num  21.3 19.5 15.7 17.9 17.9 24.6 19.7 23.1 NA 16.2 ...
##  $ CTL_ki67_MLN_d27                  : num  26.4 21.4 20.2 28.1 29.2 29.3 16.1 28.5 NA 24.1 ...
##  $ MemoryT_perc_MLN_d27              : num  5.81 7.3 5.77 2.4 4.32 2.91 3.63 4.63 NA 1.07 ...
##  $ MemoryT_perc_CD4highCD8low_MLN_d27: num  43.1 73.4 54.5 76 52 55.2 46.7 75.4 NA 73.6 ...
##  $ MemoryT_Ki67_CD4highCD8low_MLN_d27: num  45.5 28.8 37 51.9 35.4 39.6 35 69.8 NA 41.1 ...
##  $ MemoryT_perc_CD4lowCD8high_MLN_d27: num  55.1 24.5 43.5 21.5 43.3 41.1 51 23 NA 23.8 ...
##  $ MemoryT_Ki67_CD4lowCD8high_MLN_d27: num  36.8 24.8 24 28.3 49.8 41.9 23.8 50.6 NA 46.8 ...
##  $ HelperT_perc_MLN_d27              : num  68.1 65.3 72.3 73.9 69.7 66.1 68.3 65.1 NA 73.9 ...
##  $ HelperT_Ki67_MLN_d27              : num  18.3 22.1 13.8 13.6 13.6 23.4 14.4 17.9 NA 22.7 ...
##  $ Tregs_perc_MLN_d27                : num  7.26 10.8 7.82 5.74 5.88 10.1 14.5 6.12 NA 12 ...
##  $ Gammadelta_perc_MLN_d27           : num  1.13 0.81 0.74 1.51 1.06 1.33 1.31 1.51 NA 1.78 ...
##  $ Gammadelta_Ki67_MLN_d27           : num  25.4 17.8 16.1 23.5 17.3 25.2 12.1 22 NA 27 ...
##  $ cdc1_perc_MLN_d27                 : num  0.41 0.17 0.085 0.86 0.3 0.15 0.22 0.38 0.54 0.41 ...
##  $ cdc1_8086_MLN_d27                 : num  62.9 78.4 75.8 57.8 103 80.9 51.4 33.4 33.4 25.7 ...
##  $ cdc2_perc_MLN_d27                 : num  0.56 0.27 0.21 1.59 0.11 0.25 0.3 0.43 0.63 0.85 ...
##  $ cdc2_8086_MLN_d27                 : num  205 187 202 116 174 209 146 146 229 158 ...
##  $ pdc_perc_MLN_d27                  : num  2.77 2.33 10.8 4.83 6.09 2.53 5.56 4.62 1.49 2.92 ...
##  $ pdc_8086_MLN_d27                  : num  127 65.5 160 142 144 130 139 138 121 133 ...
##  $ mln_restimulation_il10_ConA_5_d27 : num  2230.7 777.3 458 189.5 84.4 ...
##  $ mln_restimulation_il10_lps_10_d27 : num  21.71 13.79 5.8 9.94 12.32 ...
##  $ mln_restimulation_il10_medium_d27 : num  5.22 2.32 0.2 2.27 2.13 ...
##  $ mln_restimulation_TNF_ConA_5_d27  : num  76.6 17.4 20.5 18.8 7.8 ...
##  $ mln_restimulation_TNF_LPS_10_d27  : num  8.974 4.263 3.686 2.317 0.984 ...
##  $ mln_restimulation_TNF_medium_d27  : num  7.6 2.45 1.92 1.48 1.34 ...
##  $ Lactobacillus                     : num  0.44 0.562 0.449 0.759 0.245 ...
##  $ Streptococcus                     : num  0.00499 0.07314 0.10518 0.02061 0.06589 ...
##  $ Clostridium_sensu_stricto_1       : num  0.0895 0.0459 0.0672 0.0251 0.2587 ...
##  $ Romboutsia                        : num  0.163 0.129 0.124 0.123 0.227 ...
##  $ Terrisporobacter                  : num  0.0247 0.0761 0.0858 0.0515 0.0803 ...
##  $ Turicibacter                      : num  0.2545 0.0796 0.0594 0.0162 0.0664 ...
##  $ Veillonella                       : num  0 0.00896 0.00502 0 0.00727 ...
##  $ Actinobacillus                    : num  0 0.00114 0.01525 0.00185 0.01025 ...
##  $ Haemophilus                       : num  0.01337 0.00718 0.03706 0 0.02663 ...

11.4. Create correlation plot

# P.adjust: Calculated using Benjamini-Hockberg (BH) method
cors<-cor(my_data)
cor.mtest <- function(mat, conf.level = 0.95) {
  mat <- as.matrix(mat)
  n <- ncol(mat)
  p.mat <- lowCI.mat <- uppCI.mat <- matrix(NA, n, n)
  diag(p.mat) <- 0
  diag(lowCI.mat) <- diag(uppCI.mat) <- 1
  for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
      tmp <- cor.test(mat[, i], mat[, j], conf.level = conf.level)
      p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
      lowCI.mat[i, j] <- lowCI.mat[j, i] <- tmp$conf.int[1]
      uppCI.mat[i, j] <- uppCI.mat[j, i] <- tmp$conf.int[2]
    }
  }
  return(list(p.mat, lowCI.mat, uppCI.mat))
}
res1 <- cor.mtest(my_data, 0.95)

#You can vectorize your p-value matrix (e.g. res1), which has been returned by cor.mtest function. Afterwards, p.adjust function is  used to get the adjusted p-values.
pAdj <- p.adjust(c(res1[[1]]), method = "BH")
#Then create the original matrix with adjusted p-values.
resAdj <- matrix(pAdj, ncol = dim(res1[[1]])[1])
#Then the corrplot function is used to create the plot containing adjusted p-values.
corrplot(cors, p.mat = resAdj, tl.col = "black", sig.level=0.05, insig="blank", cl.align="r", tl.cex=0.6, order="original", na.label="square", na.label.col = "white", type="lower", tl.srt=60, cl.ratio=0.1)

# Save as pdf
pdf("./figures/Correlation_plot.pdf", width=10, height=10)
corrplot(cors, p.mat = resAdj, sig.level=0.05, insig="blank", cl.align="r", tl.cex=0.6, order="original", na.label="square", na.label.col = "white", tl.col = "black", type="lower", tl.srt=60, cl.ratio=0.1)
dev.off() #The final figure can be found as Supplementary Figure S9 of the manuscript
## png 
##   2

12. pH Boxplot

Figure was generated using code below, and was then manually exported. This is why it is not showing up in the ‘figures’ folder. The unprocessed figure can be found under its individual folder.

12.1. Import and prepare data

my_data <- read.table("./input_data/pH_of_GI_segments.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

# Convert 'id', 'group', 'time' and 'treatment' to factors: 

my_data$id <- as.factor(my_data$id)
my_data$group <- factor(my_data$group, levels = c("jejunum", "ileum", "caecum", "colon"))
my_data$time <- as.factor(my_data$time)
my_data$treatment <- as.factor(my_data$treatment)

12.2. Create boxplot

p <- ggplot(my_data, aes(x=time, y=score, fill = factor(group))) +  
  geom_boxplot(outlier.shape = NA) + scale_fill_brewer(palette="PRGn") +   labs(fill="group")
p <- p + geom_point(position=position_jitterdodge(jitter.width = 0.2), aes(fill = factor(group), shape=treatment), alpha=0.7, size = 3) 
p
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Warning: Removed 14 rows containing missing values (geom_point).

13. R version and Packages used

version
##                _                           
## platform       x86_64-w64-mingw32          
## arch           x86_64                      
## os             mingw32                     
## system         x86_64, mingw32             
## status                                     
## major          3                           
## minor          6.1                         
## year           2019                        
## month          07                          
## day            05                          
## svn rev        76782                       
## language       R                           
## version.string R version 3.6.1 (2019-07-05)
## nickname       Action of the Toes
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19041)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_Netherlands.1252  LC_CTYPE=English_Netherlands.1252   
## [3] LC_MONETARY=English_Netherlands.1252 LC_NUMERIC=C                        
## [5] LC_TIME=English_Netherlands.1252    
## 
## attached base packages:
##  [1] splines   parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] moments_0.14                gamlss_5.1-7               
##  [3] gamlss.dist_5.1-7           gamlss.data_5.1-4          
##  [5] datarium_0.1.0              rstatix_0.6.0              
##  [7] forcats_0.5.0               stringr_1.4.0              
##  [9] readr_1.3.1                 tidyr_1.1.1                
## [11] tibble_3.0.3                tidyverse_1.3.0            
## [13] corrplot_0.84               metamicrobiomeR_1.1        
## [15] ggvegan_0.1-0               psych_2.0.7                
## [17] DESeq2_1.26.0               SummarizedExperiment_1.16.1
## [19] DelayedArray_0.12.3         BiocParallel_1.20.1        
## [21] matrixStats_0.56.0          Biobase_2.46.0             
## [23] GenomicRanges_1.38.0        GenomeInfoDb_1.22.1        
## [25] IRanges_2.20.2              S4Vectors_0.24.4           
## [27] BiocGenerics_0.32.0         DT_0.13                    
## [29] propr_4.2.6                 CoDaSeq_0.99.6             
## [31] car_3.0-8                   carData_3.0-4              
## [33] zCompositions_1.3.4         truncnorm_1.0-8            
## [35] NADA_1.6-1.1                compositions_2.0-0         
## [37] ALDEx2_1.18.0               RColorBrewer_1.1-2         
## [39] viridis_0.5.1               viridisLite_0.3.0          
## [41] purrr_0.3.4                 data.table_1.12.8          
## [43] multcompView_0.1-8          multcomp_1.4-13            
## [45] TH.data_1.0-10              MASS_7.3-51.4              
## [47] mvtnorm_1.1-1               emmeans_1.4.8              
## [49] picante_1.8.2               vegan_2.5-6                
## [51] permute_0.9-5               Hmisc_4.4-0                
## [53] Formula_1.2-3               survival_3.1-12            
## [55] lattice_0.20-38             decontam_1.6.0             
## [57] pheatmap_1.0.12             apeglm_1.8.0               
## [59] sciplot_1.2-0               lme4_1.1-23                
## [61] Matrix_1.2-17               nlme_3.1-140               
## [63] ggrepel_0.8.2               knitr_1.29                 
## [65] scales_1.1.1                reshape2_1.4.4             
## [67] ape_5.4                     dplyr_1.0.1                
## [69] plyr_1.8.6                  ggpubr_0.4.0               
## [71] microbiomeutilities_0.99.02 microbiome_2.1.26          
## [73] ggplot2_3.3.2               phyloseq_1.30.0            
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.1.4             tidyselect_1.1.0       RSQLite_2.2.0         
##   [4] AnnotationDbi_1.48.0   htmlwidgets_1.5.1      grid_3.6.1            
##   [7] Rtsne_0.15             munsell_0.5.0          codetools_0.2-16      
##  [10] statmod_1.4.34         withr_2.2.0            colorspace_1.4-1      
##  [13] rstudioapi_0.11        robustbase_0.93-6      bayesm_3.1-4          
##  [16] ggsignif_0.6.0         labeling_0.3           bbmle_1.0.23.1        
##  [19] GenomeInfoDbData_1.2.2 mnormt_1.5-7           farver_2.0.3          
##  [22] bit64_4.0.2            rhdf5_2.30.1           coda_0.19-3           
##  [25] vctrs_0.3.2            generics_0.0.2         xfun_0.16             
##  [28] R6_2.4.1               locfit_1.5-9.4         bitops_1.0-6          
##  [31] assertthat_0.2.1       nnet_7.3-12            gtable_0.3.0          
##  [34] sandwich_2.5-1         rlang_0.4.7            genefilter_1.68.0     
##  [37] acepack_1.4.1          broom_0.7.0            checkmate_2.0.0       
##  [40] modelr_0.1.8           yaml_2.2.1             abind_1.4-5           
##  [43] backports_1.1.8        tensorA_0.36.1         tools_3.6.1           
##  [46] ellipsis_0.3.1         biomformat_1.14.0      Rcpp_1.0.5            
##  [49] base64enc_0.1-3        zlibbioc_1.32.0        RCurl_1.98-1.2        
##  [52] rpart_4.1-15           zoo_1.8-8              haven_2.3.1           
##  [55] cluster_2.1.0          fs_1.4.1               magrittr_1.5          
##  [58] openxlsx_4.1.5         reprex_0.3.0           hms_0.5.3             
##  [61] evaluate_0.14          xtable_1.8-4           XML_3.99-0.3          
##  [64] rio_0.5.16             emdbook_1.3.12         jpeg_0.1-8.1          
##  [67] readxl_1.3.1           gridExtra_2.3          compiler_3.6.1        
##  [70] bdsmatrix_1.3-4        crayon_1.3.4           minqa_1.2.4           
##  [73] htmltools_0.5.0        mgcv_1.8-28            geneplotter_1.64.0    
##  [76] lubridate_1.7.9        DBI_1.1.0              dbplyr_1.4.4          
##  [79] boot_1.3-22            ade4_1.7-15            cli_2.0.2             
##  [82] gdata_2.18.0           igraph_1.2.5           pkgconfig_2.0.3       
##  [85] numDeriv_2016.8-1.1    foreign_0.8-71         xml2_1.3.2            
##  [88] foreach_1.5.0          annotate_1.64.0        multtest_2.42.0       
##  [91] XVector_0.26.0         estimability_1.3       rvest_0.3.6           
##  [94] digest_0.6.25          Biostrings_2.54.0      rmarkdown_2.3         
##  [97] cellranger_1.1.0       htmlTable_2.0.1        curl_4.3              
## [100] gtools_3.8.2           nloptr_1.2.2.1         lifecycle_0.2.0       
## [103] jsonlite_1.6.1         Rhdf5lib_1.8.0         fansi_0.4.1           
## [106] pillar_1.4.6           httr_1.4.2             DEoptimR_1.0-8        
## [109] glue_1.4.1             zip_2.0.4              png_0.1-7             
## [112] iterators_1.0.12       bit_4.0.4              stringi_1.4.6         
## [115] blob_1.2.1             latticeExtra_0.6-29    memoise_1.1.0