This HTML document contains the R-code that was used to perform analyses and create figures for the corresponding manuscript. At the end of the document the R version and package versions can be found. Should one want to run this R project in R-studio, it is advised to use the same R version and package versions as have been used here.
#Warnings and messages have been inactivated for this markdown
library(phyloseq)
library(microbiome)
library(microbiomeutilities)
library(ggplot2)
library(ggpubr)
library(plyr)
library(dplyr)
library(ape)
library(reshape2)
library(scales)
library(knitr)
library(ggrepel)
library(nlme)
library(lme4)
library(sciplot)
library(apeglm)
library(pheatmap)
library(decontam)
library(Hmisc)
library(picante)
library(emmeans)
library(multcomp)
library(multcompView)
library(data.table)
library(purrr)
library(viridis)
library(RColorBrewer)
library(ALDEx2)
library(compositions)
library(zCompositions)
library(CoDaSeq)
library(propr)
library(vegan)
library(DT)
library(DESeq2)
library(psych)
library(car)
library(ggvegan)
library(metamicrobiomeR)
library(corrplot)
library(tidyverse)
library(rstatix)
library(datarium)
# Create Folders as following
# Figures
dir.create("figures")
# Phyloseq objects
dir.create("phyobjects")
# Phyloseq objects
dir.create("output_data")
Loading Illumina sequencing data of library 1-14. Data was run through NGTax 2.0 and SILVA database version 132 was used as reference database.
Create phyloseq object
# create phyloseq object of all libraries
ps <- read_phyloseq(otu.file = "./input_data/Galaxy58-[NG-Tax__porcine_study_all_samples_eightrun_length70bp_11march2020].biom1",
taxonomy.file = NULL,
metadata.file = "./input_data/Metadata_porcine_study_june_2020_y_glucan.csv",
type = "biom")
## Time to complete depends on OTU file size
# create tree object
treefile <- read.tree("./input_data/all_otus_eightrun_length70bp_11march2020.tre")
# merge tree and phyloseq object
ps1.all <- merge_phyloseq(ps,treefile)
print(ps1.all)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4157 taxa and 482 samples ]
## sample_data() Sample Data: [ 482 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 4157 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 4157 tips and 4156 internal nodes ]
# export taxonomy table (for Windows)
taxonomy_table_ps1 <- as.data.frame(ps1.all@tax_table)
write.csv(taxonomy_table_ps1, file = "./output_data/taxonomy_table_ps1.csv", fileEncoding = "UTF-16LE")
ntaxa(ps1.all)
## [1] 4157
Remove pattern in taxa names
# remove pattern in taxa names
tax_table(ps1.all)[,colnames(tax_table(ps1.all))] <- gsub(tax_table(ps1.all)[,colnames(tax_table(ps1.all))],pattern="[a-z]__",replacement="")
Export phyloseq object
# this is the raw, unfiltered phyloseq file
saveRDS(ps1.all, "./phyobjects/ps1_all.rds")
ps1.all <- readRDS("./phyobjects/ps1_all.rds")
# create new object to work with
ps1 <- ps1.all
Removing mitochondrial and chloroplast reads
# replace empty fields by "Unmatched_level"
# this is needed because otherwise the microbiome::aggregate_taxa() function will not run
tax_table(ps1)[tax_table(ps1)[,"Phylum"]== "","Phylum"] <- "Unmatched_phylum"
tax_table(ps1)[tax_table(ps1)[,"Class"]== "","Class"] <- "Unmatched_class"
tax_table(ps1)[tax_table(ps1)[,"Order"]== "","Order"] <- "Unmatched_order"
tax_table(ps1)[tax_table(ps1)[,"Family"]== "","Family"] <- "Unmatched_family"
tax_table(ps1)[tax_table(ps1)[,"Genus"]== "","Genus"] <- "Unmatched_genus"
# remove mitochondria and chloroplasts
ps1 <- subset_taxa(ps1, Family != "Mitochondria")
ps1 <- subset_taxa(ps1, Class != "Chloroplast") #No Chloroplasts found in class Chloroplasts
ps1.mitoch <- subset_taxa(ps1.all, Family == "Mitochondria")
ps1.mitoch <- prune_samples(sample_sums(ps1.mitoch)>0,ps1.mitoch)
sort(sample_sums(ps1.mitoch))
## m2195 m3035 m1338 m1315 m3041 m2199 m3073 m1832 m3012 m3032 m1318 m3022 m3016
## 163 188 198 202 242 248 278 464 829 1440 2110 2603 3542
## m1339 m3037 m3042 m3031 m3040 m1329 m3015 m2194 m3011
## 4010 4951 5456 6973 9682 15198 21652 33679 34331
# There are however Oxyphotobacteria (Dutch translation: blauwalgen) present in the dataset, which are not removed from the dataset:
ps1.chloro <- subset_taxa(ps1.all, Order == "Chloroplast")
ps1.chloro <- prune_samples(sample_sums(ps1.chloro)>0,ps1.chloro)
sort(sample_sums(ps1.chloro))
## water.blank.9 water.blank.13 m2209 m2199 m2205
## 2 36 103 142 298
## m1398 m2183 m1832 m3035 m1317
## 309 333 446 483 517
## m1356 m1333 m3041 m3026 m3073
## 546 607 700 725 726
## m3020 m2235 m2232 m1318 m3016
## 956 1070 1208 2832 3218
## m3042 m3012 m3032 m1339 m3022
## 3448 4065 5062 5465 7845
## m3011 m3031 m3015 m3040 m3037
## 8747 11367 11572 11760 18781
## m1329 m2194
## 46379 49513
print(ps1)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4123 taxa and 482 samples ]
## sample_data() Sample Data: [ 482 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 4123 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 4123 tips and 4122 internal nodes ]
ntaxa(ps1.all)-ntaxa(ps1) # 34 ASVs removed which were belonging to mitochondria
## [1] 34
Remove contaminant reads, identified by visual inspection of each ASV individually using correlation plots [DNA_reading] vs rel. ASV abundance. Resulted in 8 ASVs identified as contaminants.
# remove samples that have no reads. Does not do anything as in this dataset there are reads in every sample:
psx <- prune_samples(sample_sums(ps1)>0,ps1)
# add higher taxa names
psx.tax <- as.data.frame(psx@tax_table)
psx.tax$OTU <- rownames(psx.tax)
# import contaminant OTU table
visContam <- read.delim("./input_data/Contaminant_OTU_by_visual_identification.txt",header=T)
psx.tax2 <- psx.tax
rownames(psx.tax2) <- NULL # reset rownames for subset based on index
psx.tax2$contam <- rownames(psx.tax2) %in% visContam$OTU
table(psx.tax2$contam) # works: 8 OTUs T, rest F
##
## FALSE TRUE
## 4115 8
rownames(psx.tax2) <- psx.tax2$OTU # restore OTU as rownames
psx.tax2$reads <- taxa_sums(psx)
# calculate % contaminant reads
contam.otu2 <- subset(psx.tax2, contam == "TRUE")
(sum(contam.otu2$reads) / sum(abundances(psx)))
## [1] 9.182982e-05
# 0.0092% of all reads
# subset phyloseq to contaminant OTUs
psx.sub <- subset_taxa(psx, psx.tax2$contam)
ps1.decontam <- prune_taxa(!psx.tax2$contam, ps1)
ps1.decontam
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4115 taxa and 482 samples ]
## sample_data() Sample Data: [ 482 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 4115 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 4115 tips and 4114 internal nodes ]
ps1.decontam <- prune_samples(sample_sums(ps1.decontam)>0, ps1.decontam)
ps1.decontam # 4115 taxa, 482 samples
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4115 taxa and 481 samples ]
## sample_data() Sample Data: [ 481 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 4115 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 4115 tips and 4114 internal nodes ]
# samples with 0 reads after removing contaminants: 1 (water.blank.10)
# (code in following 2 lines only runs if not running prune_samples command above, used to identify removed samples)
#sample0 <- subset_samples(ps1.decontam, sample_sums(ps1.decontam)==0)
#sample_data(sample0)
ntaxa(ps1)-ntaxa(ps1.decontam)
## [1] 8
# 8 OTUs removed (resulting from the visual identification of contaminants)
sum(abundances(ps1)) - sum(abundances(ps1.decontam))
## [1] 8244
# 8244 reads removed.
# dataset without mitochondrial/chloroplast reads, but with contaminants
saveRDS(ps1, "./phyobjects/ps1.rds")
ps1 <- readRDS("./phyobjects/ps1.rds")
# decontaminated dataset
saveRDS(ps1.decontam, "./phyobjects/ps1.decontam.rds")
ps1.decontam <- readRDS("./phyobjects/ps1.decontam.rds")
# all following objects are without contaminant ASVs:
ps1.unique <- subset_samples(ps1.decontam, Unique == "yes")
ps1.noROM <- subset_samples(ps1.unique, Treatment != "ROM")
ps1.glucan <- subset_samples(ps1.noROM, Treatment != "EcN")
ps1.glucan <- prune_taxa(taxa_sums(otu_table(ps1.glucan))>0, ps1.glucan)
ps1.faeces <- subset_samples(ps1.glucan, Origin == "faeces")
ps1.faeces <- prune_taxa(taxa_sums(otu_table(ps1.faeces))>0, ps1.faeces)
ps1.faeces.pre <- subset_samples(ps1.faeces, Pre_or_post_weaning == "Pre_weaning")
ps1.faeces.pre <- prune_taxa(taxa_sums(otu_table(ps1.faeces.pre))>0, ps1.faeces.pre)
ps1.faeces.post <- subset_samples(ps1.faeces, Pre_or_post_weaning == "Post_weaning")
ps1.faeces.post <- prune_taxa(taxa_sums(otu_table(ps1.faeces.post))>0, ps1.faeces.post)
ps1.digesta <- subset_samples(ps1.glucan, Origin %in% c("jejunum_digesta", "ileum_digesta", "caecum_digesta"))
ps1.digesta <- prune_taxa(taxa_sums(otu_table(ps1.digesta))>0, ps1.digesta)
ps1.jejunum.digesta <- subset_samples(ps1.glucan, Origin %in% c("jejunum_digesta"))
ps1.jejunum.digesta <- prune_taxa(taxa_sums(otu_table(ps1.jejunum.digesta))>0, ps1.jejunum.digesta)
ps1.ileum.digesta <- subset_samples(ps1.glucan, Origin %in% c("ileum_digesta"))
ps1.ileum.digesta <- prune_taxa(taxa_sums(otu_table(ps1.ileum.digesta))>0, ps1.ileum.digesta)
ps1.caecum.digesta <- subset_samples(ps1.glucan, Origin %in% c("caecum_digesta"))
ps1.caecum.digesta <- prune_taxa(taxa_sums(otu_table(ps1.caecum.digesta))>0, ps1.caecum.digesta)
ps1.swabs <- subset_samples(ps1.faeces, Swab %in% c("swab"))
ps1.swabs <- prune_taxa(taxa_sums(otu_table(ps1.swabs))>0, ps1.swabs)
ps1.no.swabs <- subset_samples(ps1.faeces, Swab %in% c("no_swab"))
ps1.no.swabs <- prune_taxa(taxa_sums(otu_table(ps1.no.swabs))>0, ps1.no.swabs)
print(ps1.faeces) # 287 samples
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2292 taxa and 287 samples ]
## sample_data() Sample Data: [ 287 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 2292 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2292 tips and 2291 internal nodes ]
# filter out archaea and place in phyloseq object
ps1.archaea <- subset_taxa(ps1.glucan, Phylum == "Euryarchaeota")
ps1.archaea
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 22 taxa and 421 samples ]
## sample_data() Sample Data: [ 421 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 22 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 22 tips and 21 internal nodes ]
# negative controls - includes contaminant reads (ps1)
ps1.contr <- subset_samples(ps1, Origin %in% c("blank"))
ps1.contr <- prune_taxa(taxa_sums(otu_table(ps1.contr))>0, ps1.contr)
print(ps1.contr)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 202 taxa and 14 samples ]
## sample_data() Sample Data: [ 14 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 202 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 202 tips and 201 internal nodes ]
# positive controls (mocks) - with contaminant reads (ps1)
ps1.mock <- subset_samples(ps1, Origin %in% c("mock.3","mock.4"))
ps1.mock <- prune_taxa(taxa_sums(otu_table(ps1.mock))>0, ps1.mock)
print(ps1.mock)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 115 taxa and 28 samples ]
## sample_data() Sample Data: [ 28 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 115 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 115 tips and 114 internal nodes ]
# technical duplicates (PCR) - includes contaminant reads (ps1)
ps1.tech <- subset_samples(ps1, Description %in% c("m1858", "m1858R1","m1858R2","m1858R3", "m2158", "m2158R1", "m2158R2", "m2158R3", "m169", "m169R1", "m169R2", "m169R3", "m169R4", "m169R5", "m169R6", "m924", "m924R1", "m924R2", "m924R3", "m924R4", "m924R5", "m924R6"))
ps1.tech <- prune_taxa(taxa_sums(otu_table(ps1.tech))>0, ps1.tech)
print(ps1.tech)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 345 taxa and 22 samples ]
## sample_data() Sample Data: [ 22 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 345 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 345 tips and 344 internal nodes ]
# subset mitochondrial sequences
ps1.mit <- subset_taxa(ps1.all, Family == "Mitochondria")
print(ps1.mit)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 34 taxa and 482 samples ]
## sample_data() Sample Data: [ 482 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 34 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 34 tips and 33 internal nodes ]
# subset chloroplast sequences
#ps1.chloro <- subset_taxa(ps1.all, Class == "Chloroplast") #does not run, since there do not seem to be chloroplast reads present
Saving subsets in compressed formats (RDS).
# with contaminants
saveRDS(ps1.decontam, "./phyobjects/ps1.decontam.rds")
saveRDS(ps1.unique, "./phyobjects/ps1.unique.rds")
saveRDS(ps1.all, "./phyobjects/ps1.all.rds")
saveRDS(ps1.tech, "./phyobjects/ps1.tech.rds")
saveRDS(ps1.contr, "./phyobjects/ps1.contr.rds")
saveRDS(ps1.mock, "./phyobjects/ps1.mock.rds")
saveRDS(ps1.mit, "./phyobjects/ps1.mit.rds")
saveRDS(ps1.faeces, "./phyobjects/ps1.faeces.rds")
saveRDS(ps1.faeces.pre, "./phyobjects/ps1.faeces.pre.rds")
saveRDS(ps1.faeces.post, "./phyobjects/ps1.faeces.post.rds")
saveRDS(ps1.glucan, "./phyobjects/ps1.glucan.rds")
saveRDS(ps1.digesta, "./phyobjects/ps1.digesta.rds")
saveRDS(ps1.jejunum.digesta, "./phyobjects/ps1.jejunum.digesta.rds")
saveRDS(ps1.ileum.digesta, "./phyobjects/ps1.ileum.digesta.rds")
saveRDS(ps1.caecum.digesta, "./phyobjects/ps1.caecum.digesta.rds")
saveRDS(ps1.swabs, "./phyobjects/ps1.swabs.rds")
saveRDS(ps1.no.swabs, "./phyobjects/ps1.no.swabs.rds")
saveRDS(ps1.archaea, "./phyobjects/ps1.archaea.rds")
# load full datasets from RDS
ps1.all <- readRDS("./phyobjects/ps1.all.rds")
Exploring the stats of the datasets: # reads, ASVs, nr of genera, etc.
sum(sample_sums(ps1.all)) # raw phyloseq
## [1] 89923196
sum(sample_sums(ps1)) # excl mitrochondria/chloroplast, incl contaminants
## [1] 89774757
sum(sample_sums(ps1.decontam)) # excl mitrochondria/chloroplast, excl contaminants
## [1] 89766513
print(ps1)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4123 taxa and 482 samples ]
## sample_data() Sample Data: [ 482 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 4123 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 4123 tips and 4122 internal nodes ]
ntaxa(aggregate_taxa(ps1,"Genus"))
## [1] 429
colnames(tax_table(ps1))
## [1] "Domain" "Phylum" "Class" "Order" "Family" "Genus"
ps1.faeces <- readRDS("./phyobjects/ps1.faeces.rds") #all faecal samples
# this line is used to make sure the order of days is chronological:
ps1.faeces@sam_data$Day_of_study_factor <- factor(ps1.faeces@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
# this line is used to make sure pre-weaning is followed by post-weaning:
ps1.faeces@sam_data$Pre_or_post_weaning <- factor(ps1.faeces@sam_data$Pre_or_post_weaning, levels = c("Pre-weaning", "Post-weaning"))
ps1.digesta <- readRDS("./phyobjects/ps1.digesta.rds") #all digesta samples
# this line is used to reorder segments according order luminal content passages through the GI tract of the animals:
ps1.digesta@sam_data$Origin <- factor(ps1.digesta@sam_data$Origin, levels = c("jejunum_digesta", "ileum_digesta", "caecum_digesta"))
print(ps1.digesta) #contains 1608 taxa in 134 samples
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1608 taxa and 134 samples ]
## sample_data() Sample Data: [ 134 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 1608 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1608 tips and 1607 internal nodes ]
ps1.glucan <- readRDS("./phyobjects/ps1.glucan.rds") #all faecal and digestal samples of the current study
# this line is used to combine faeces and digesta in an orderly fashion for use in the alpha-diversity plots:
ps1.glucan@sam_data$Origin_Day <- factor(ps1.glucan@sam_data$Origin_Day, levels = c("faeces_Day_4", "faeces_Day_8", "faeces_Day_14", "faeces_Day_26", "faeces_Day_35", "faeces_Day_43", "faeces_Day_59", "faeces_Day_69", "jejunum_digesta_Day_27", "jejunum_digesta_Day_44", "jejunum_digesta_Day_70", "ileum_digesta_Day_27", "ileum_digesta_Day_44", "ileum_digesta_Day_70", "caecum_digesta_Day_27", "caecum_digesta_Day_44", "caecum_digesta_Day_70"))
p <- plot_richness(ps1.faeces, "Day_of_study_factor", color = "Treatment", measures = "Shannon")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Day_of_study")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha=1, size=1.5) #+ theme_bw()
p$layers <- p$layers[-1]
print (p) #this plot is created for illustrative purposes, and is not saved or used in the manuscript
p <- plot_richness(ps1.digesta, "Day_of_study_factor", color = "Treatment", measures = "Shannon")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Day_of_study")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha=1, size=1.5)
p <- p + facet_wrap("Origin")
p$layers <- p$layers[-1]
print (p) #this plot is created for illustrative purposes, and is not saved or used in the manuscript
# following figures combine the faeces and digesta alpha diversities
p <- plot_richness(ps1.glucan, "Origin_Day", color = "Treatment", measures = "Shannon")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Origin_Day")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha = .9, size=2)
p$layers <- p$layers[-1]
#The final figure can be found under panel B in Figure 2 of the manuscript
print(p)
ggsave("./figures/Alpha_diversity_Shannon.pdf", height = 7, width = 11)
p <- plot_richness(ps1.glucan, "Origin_Day", color = "Treatment", measures = "InvSimpson")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Origin_Day")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha = .9, size=2)
p$layers <- p$layers[-1]
#The final figure can be found under panel A in Figure 2 of the manuscript
print(p)
ggsave("./figures/Alpha_diversity_InvSimpson.pdf", height = 7, width = 11)
Loading data for LME (Linear Mixed-Effects models)
ps1 <- readRDS("./phyobjects/ps1.glucan.rds")
ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))
#ps1 <- tax_glom(ps1, "Genus") One can aggregate ASVs to genus level to facilitate genus-level analysis. It was however chosen to work with ASV-level data for alpha diversity analyses so this line is not used.
ps1.fcs <- subset_samples(ps1, Origin %in% c("faeces"))
ps1.fcs.pre <- subset_samples(ps1.fcs, Pre_or_post_weaning %in% c("Pre_weaning"))
ps1.jej <- subset_samples(ps1, Origin %in% c("jejunum_digesta"))
ps1.ile <- subset_samples(ps1, Origin %in% c("ileum_digesta"))
ps1.cae <- subset_samples(ps1, Origin %in% c("caecum_digesta"))
ps1.fcs <- prune_taxa(taxa_sums(otu_table(ps1.fcs))>0, ps1.fcs)
ps1.fcs.pre <- prune_taxa(taxa_sums(otu_table(ps1.fcs.pre))>0, ps1.fcs.pre)
ps1.jej <- prune_taxa(taxa_sums(otu_table(ps1.jej))>0, ps1.jej)
ps1.ile <- prune_taxa(taxa_sums(otu_table(ps1.ile))>0, ps1.ile)
ps1.cae <- prune_taxa(taxa_sums(otu_table(ps1.cae))>0, ps1.cae)
ps1.fcs.r <- microbiome::transform(ps1.fcs, "compositional")
ps1.fcs.pre.r <- microbiome::transform(ps1.fcs.pre, "compositional")
ps1.jej.r <- microbiome::transform(ps1.jej, "compositional")
ps1.ile.r <- microbiome::transform(ps1.ile, "compositional")
ps1.cae.r <- microbiome::transform(ps1.cae, "compositional")
# input for alpha diversity
ps1.otu <- as.data.frame(ps1.fcs.r@otu_table)
ps1.tree <- ps1.fcs.r@phy_tree
ps1.pre.otu <- as.data.frame(ps1.fcs.pre.r@otu_table)
ps1.pre.tree <- ps1.fcs.pre.r@phy_tree
ps1.jej.otu <- as.data.frame(ps1.jej.r@otu_table)
ps1.jej.tree <- ps1.jej.r@phy_tree
ps1.ile.otu <- as.data.frame(ps1.ile.r@otu_table)
ps1.ile.tree <- ps1.ile.r@phy_tree
ps1.cae.otu <- as.data.frame(ps1.cae.r@otu_table)
ps1.cae.tree <- ps1.cae.r@phy_tree
# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df <- data.frame("Description" = colnames(ps1.otu))
ad.df$Shannon <- estimate_richness(ps1.fcs)$Shannon
## Warning in estimate_richness(ps1.fcs): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df$InvSimpson <- estimate_richness(ps1.fcs)$InvSimpson
## Warning in estimate_richness(ps1.fcs): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df <- merge(meta(ps1.fcs.r), ad.df, by = "Description", all.x=T)
ad.df <- ad.df
# mixed model: random term and variance structure
lme.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
data = ad.df, method = "REML",
random = ~ 1 | Ear_tag)
lme.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
data = ad.df, method = "REML",
random = ~ 1 | Ear_tag)
# residual plots to visually inspect normality assumption
plot(resid(lme.pd1, method= "pearson")~ad.df$Day_of_study_factor); abline(0,0)
hist(resid(lme.pd1, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.pd1, ∼ranef (.))
qqnorm(lme.pd1)
plot(resid(lme.pd2, method= "pearson")~ad.df$Day_of_study_factor); abline(0,0)
hist(resid(lme.pd2, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.pd2, ∼ranef (.))
qqnorm(lme.pd2)
# model output
aov.pd.ls1 <- anova(lme.pd1)
aov.pd.ls1
## numDF denDF F-value p-value
## (Intercept) 1 225 23680.734 <.0001
## Day_of_study_factor 7 225 26.781 <.0001
## Treatment 1 46 2.601 0.1136
## Day_of_study_factor:Treatment 7 225 1.465 0.1807
aov.pd.ls2 <- anova(lme.pd2)
aov.pd.ls2
## numDF denDF F-value p-value
## (Intercept) 1 225 1207.4831 <.0001
## Day_of_study_factor 7 225 14.0901 <.0001
## Treatment 1 46 0.2810 0.5986
## Day_of_study_factor:Treatment 7 225 0.9598 0.4615
# test differences between timepoints - Shannon diversity
res.aov2 <- aov(Shannon ~ Day_of_study_factor, data = ad.df)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Day_of_study_factor 7 30.24 4.320 26.32 <2e-16 ***
## Residuals 279 45.79 0.164
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon ~ Day_of_study_factor, data = ad.df)
##
## $Day_of_study_factor
## diff lwr upr p adj
## Day_8-Day_4 0.44294102 0.19039591 0.6954861 0.0000049
## Day_14-Day_4 0.56482556 0.31228045 0.8173707 0.0000000
## Day_26-Day_4 0.78572442 0.53183953 1.0396093 0.0000000
## Day_35-Day_4 0.99917659 0.71682258 1.2815306 0.0000000
## Day_43-Day_4 0.81884184 0.53648782 1.1011959 0.0000000
## Day_59-Day_4 0.90620138 0.54904866 1.2633541 0.0000000
## Day_69-Day_4 0.95042359 0.59327087 1.3075763 0.0000000
## Day_14-Day_8 0.12188454 -0.13066057 0.3744297 0.8207226
## Day_26-Day_8 0.34278340 0.08889851 0.5966683 0.0012643
## Day_35-Day_8 0.55623558 0.27388156 0.8385896 0.0000002
## Day_43-Day_8 0.37590082 0.09354680 0.6582548 0.0015870
## Day_59-Day_8 0.46326036 0.10610764 0.8204131 0.0023791
## Day_69-Day_8 0.50748257 0.15032985 0.8646353 0.0005240
## Day_26-Day_14 0.22089886 -0.03298602 0.4747837 0.1404516
## Day_35-Day_14 0.43435104 0.15199702 0.7167051 0.0001110
## Day_43-Day_14 0.25401628 -0.02833774 0.5363703 0.1130279
## Day_59-Day_14 0.34137582 -0.01577690 0.6985285 0.0726414
## Day_69-Day_14 0.38559803 0.02844531 0.7427508 0.0241359
## Day_35-Day_26 0.21345218 -0.07010080 0.4970052 0.2978287
## Day_43-Day_26 0.03311742 -0.25043556 0.3166704 0.9999645
## Day_59-Day_26 0.12047696 -0.23762437 0.4785783 0.9699513
## Day_69-Day_26 0.16469917 -0.19340216 0.5228005 0.8546363
## Day_43-Day_35 -0.18033476 -0.48963809 0.1289686 0.6337445
## Day_59-Day_35 -0.09297522 -0.47179289 0.2858425 0.9953216
## Day_69-Day_35 -0.04875301 -0.42757067 0.3300647 0.9999314
## Day_59-Day_43 0.08735954 -0.29145813 0.4661772 0.9968286
## Day_69-Day_43 0.13158175 -0.24723592 0.5103994 0.9641856
## Day_69-Day_59 0.04422221 -0.39319875 0.4816432 0.9999868
# test differences between timepoints - InvSimpson diversity
res.aov2 <- aov(InvSimpson ~ Day_of_study_factor, data = ad.df)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Day_of_study_factor 7 11186 1598.0 14.08 1.24e-15 ***
## Residuals 279 31674 113.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = InvSimpson ~ Day_of_study_factor, data = ad.df)
##
## $Day_of_study_factor
## diff lwr upr p adj
## Day_8-Day_4 4.7395735 -1.9027891 11.381936 0.3675634
## Day_14-Day_4 7.3851438 0.7427813 14.027506 0.0176412
## Day_26-Day_4 15.1672396 8.4896388 21.844840 0.0000000
## Day_35-Day_4 19.0202808 11.5938937 26.446668 0.0000000
## Day_43-Day_4 11.9315698 4.5051828 19.357957 0.0000428
## Day_59-Day_4 16.0014942 6.6077750 25.395213 0.0000105
## Day_69-Day_4 13.6824451 4.2887260 23.076164 0.0003310
## Day_14-Day_8 2.6455703 -3.9967922 9.287933 0.9265954
## Day_26-Day_8 10.4276661 3.7500653 17.105267 0.0000806
## Day_35-Day_8 14.2807073 6.8543203 21.707094 0.0000003
## Day_43-Day_8 7.1919964 -0.2343907 14.618383 0.0654429
## Day_59-Day_8 11.2619207 1.8682016 20.655640 0.0071690
## Day_69-Day_8 8.9428717 -0.4508475 18.336591 0.0749470
## Day_26-Day_14 7.7820958 1.1044950 14.459697 0.0102256
## Day_35-Day_14 11.6351370 4.2087500 19.061524 0.0000751
## Day_43-Day_14 4.5464261 -2.8799610 11.972813 0.5729145
## Day_59-Day_14 8.6163504 -0.7773687 18.010070 0.0988058
## Day_69-Day_14 6.2973014 -3.0964178 15.691021 0.4519726
## Day_35-Day_26 3.8530412 -3.6048805 11.310963 0.7632327
## Day_43-Day_26 -3.2356697 -10.6935915 4.222252 0.8887275
## Day_59-Day_26 0.8342546 -8.5844147 10.252924 0.9999947
## Day_69-Day_26 -1.4847944 -10.9034638 7.933875 0.9997319
## Day_43-Day_35 -7.0887110 -15.2239104 1.046488 0.1391600
## Day_59-Day_35 -3.0187866 -12.9823304 6.944757 0.9834121
## Day_69-Day_35 -5.3378357 -15.3013795 4.625708 0.7279263
## Day_59-Day_43 4.0699244 -5.8936194 14.033468 0.9167609
## Day_69-Day_43 1.7508753 -8.2126685 11.714419 0.9994497
## Day_69-Day_59 -2.3190491 -13.8239584 9.185860 0.9986545
# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df.pre <- data.frame("Description" = colnames(ps1.pre.otu))
ad.df.pre$Shannon <- estimate_richness(ps1.fcs.pre)$Shannon
## Warning in estimate_richness(ps1.fcs.pre): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df.pre$InvSimpson <- estimate_richness(ps1.fcs.pre)$InvSimpson
## Warning in estimate_richness(ps1.fcs.pre): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df.pre <- merge(meta(ps1.fcs.pre.r), ad.df.pre, by = "Description", all.x=T)
ad.df.pre <- ad.df.pre
# mixed model: random term and variance structure
lme.pre.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
data = ad.df.pre, method = "REML",
random = ~ 1 | Ear_tag)
lme.pre.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
data = ad.df.pre, method = "REML",
random = ~ 1 | Ear_tag)
# residual plots to visually inspect normality assumption
plot(resid(lme.pre.pd1, method= "pearson")~ad.df.pre$Day_of_study_factor); abline(0,0)
hist(resid(lme.pre.pd1, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.pre.pd1, ∼ranef (.))
qqnorm(lme.pre.pd1)
plot(resid(lme.pre.pd2, method= "pearson")~ad.df.pre$Day_of_study_factor); abline(0,0)
hist(resid(lme.pre.pd2, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.pre.pd2, ∼ranef (.))
qqnorm(lme.pre.pd2)
# model output
aov.pd.ls1 <- anova(lme.pre.pd1)
aov.pd.ls1
## numDF denDF F-value p-value
## (Intercept) 1 137 11366.862 <.0001
## Day_of_study_factor 3 137 34.338 <.0001
## Treatment 1 46 4.561 0.0381
## Day_of_study_factor:Treatment 3 137 0.895 0.4454
aov.pd.ls2 <- anova(lme.pre.pd2)
aov.pd.ls2
## numDF denDF F-value p-value
## (Intercept) 1 137 883.6347 <.0001
## Day_of_study_factor 3 137 30.1488 <.0001
## Treatment 1 46 2.8850 0.0962
## Day_of_study_factor:Treatment 3 137 0.2277 0.8770
# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df.jej <- data.frame("Description" = colnames(ps1.jej.otu))
ad.df.jej$Shannon <- estimate_richness(ps1.jej)$Shannon
## Warning in estimate_richness(ps1.jej): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df.jej$InvSimpson <- estimate_richness(ps1.jej)$InvSimpson
## Warning in estimate_richness(ps1.jej): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df.jej <- merge(meta(ps1.jej.r), ad.df.jej, by = "Description", all.x=T)
ad.df.jej <- ad.df.jej
# mixed model: random term and variance structure
lme.jej.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
data = ad.df.jej, method = "REML",
random = ~ 1 | Ear_tag)
lme.jej.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
data = ad.df.jej, method = "REML",
random = ~ 1 | Ear_tag)
# residual plots to visually inspect normality assumption
plot(resid(lme.jej.pd1, method= "pearson")~ad.df.jej$Day_of_study_factor); abline(0,0)
hist(resid(lme.jej.pd1, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.jej.pd1, ∼ranef (.))
qqnorm(lme.jej.pd1)
plot(resid(lme.jej.pd2, method= "pearson")~ad.df.jej$Day_of_study_factor); abline(0,0)
hist(resid(lme.jej.pd2, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.jej.pd2, ∼ranef (.))
qqnorm(lme.jej.pd2)
# model output
aov.pd.ls1 <- anova(lme.jej.pd1)
aov.pd.ls1
## numDF denDF F-value p-value
## (Intercept) 1 36 1324.7306 <.0001
## Day_of_study_factor 2 36 27.9231 <.0001
## Treatment 1 36 2.0389 0.1619
## Day_of_study_factor:Treatment 2 36 0.2225 0.8016
aov.pd.ls2 <- anova(lme.jej.pd2)
aov.pd.ls2
## numDF denDF F-value p-value
## (Intercept) 1 36 254.33627 <.0001
## Day_of_study_factor 2 36 11.65518 0.0001
## Treatment 1 36 1.39728 0.2449
## Day_of_study_factor:Treatment 2 36 0.26897 0.7657
# test differences between timepoints - Shannon diversity
res.aov2 <- aov(Shannon ~ Day_of_study_factor, data = ad.df.jej)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Day_of_study_factor 2 10.179 5.089 28.3 2.55e-08 ***
## Residuals 39 7.014 0.180
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon ~ Day_of_study_factor, data = ad.df.jej)
##
## $Day_of_study_factor
## diff lwr upr p adj
## Day_44-Day_27 -1.09863471 -1.5087814 -0.6884880 0.0000003
## Day_70-Day_27 0.03951321 -0.3318251 0.4108515 0.9636645
## Day_70-Day_44 1.13814792 0.7334602 1.5428356 0.0000001
# test differences between timepoints - InvSimpson diversity
res.aov2 <- aov(InvSimpson ~ Day_of_study_factor, data = ad.df.jej)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Day_of_study_factor 2 160.6 80.3 11.98 8.78e-05 ***
## Residuals 39 261.4 6.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = InvSimpson ~ Day_of_study_factor, data = ad.df.jej)
##
## $Day_of_study_factor
## diff lwr upr p adj
## Day_44-Day_27 -3.621304 -6.124879 -1.117728 0.0030966
## Day_70-Day_27 1.262888 -1.003797 3.529574 0.3728454
## Day_70-Day_44 4.884192 2.413938 7.354446 0.0000651
# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df.ile <- data.frame("Description" = colnames(ps1.ile.otu))
ad.df.ile$Shannon <- estimate_richness(ps1.ile)$Shannon
## Warning in estimate_richness(ps1.ile): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df.ile$InvSimpson <- estimate_richness(ps1.ile)$InvSimpson
## Warning in estimate_richness(ps1.ile): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df.ile <- merge(meta(ps1.ile.r), ad.df.ile, by = "Description", all.x=T)
ad.df.ile <- ad.df.ile
# mixed model: random term and variance structure
lme.ile.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
data = ad.df.ile, method = "REML",
random = ~ 1 | Ear_tag)
lme.ile.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
data = ad.df.ile, method = "REML",
random = ~ 1 | Ear_tag)
# residual plots to visually inspect normality assumption
plot(resid(lme.ile.pd1, method= "pearson")~ad.df.ile$Day_of_study_factor); abline(0,0)
hist(resid(lme.ile.pd1, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.ile.pd1, ∼ranef (.))
qqnorm(lme.ile.pd1)
plot(resid(lme.ile.pd2, method= "pearson")~ad.df.ile$Day_of_study_factor); abline(0,0)
hist(resid(lme.ile.pd2, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.ile.pd2, ∼ranef (.))
qqnorm(lme.ile.pd2)
# model output
aov.pd.ls1 <- anova(lme.ile.pd1)
aov.pd.ls1
## numDF denDF F-value p-value
## (Intercept) 1 38 1237.8926 <.0001
## Day_of_study_factor 2 38 30.7933 <.0001
## Treatment 1 38 0.9861 0.3270
## Day_of_study_factor:Treatment 2 38 0.7677 0.4711
aov.pd.ls2 <- anova(lme.ile.pd2)
aov.pd.ls2
## numDF denDF F-value p-value
## (Intercept) 1 38 451.2633 <.0001
## Day_of_study_factor 2 38 52.7914 <.0001
## Treatment 1 38 3.8176 0.0581
## Day_of_study_factor:Treatment 2 38 2.3983 0.1045
# test differences between timepoints - Shannon diversity
res.aov2 <- aov(Shannon ~ Day_of_study_factor, data = ad.df.ile)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Day_of_study_factor 2 9.092 4.546 31.16 5.91e-09 ***
## Residuals 41 5.982 0.146
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon ~ Day_of_study_factor, data = ad.df.ile)
##
## $Day_of_study_factor
## diff lwr upr p adj
## Day_44-Day_27 -1.0211376 -1.3549503 -0.6873249 0.0000000
## Day_70-Day_27 -0.8605586 -1.2125154 -0.5086018 0.0000015
## Day_70-Day_44 0.1605790 -0.1862337 0.5073917 0.5038774
# test differences between timepoints - InvSimpson diversity
res.aov2 <- aov(InvSimpson ~ Day_of_study_factor, data = ad.df.ile)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Day_of_study_factor 2 272.1 136.07 46.43 2.92e-11 ***
## Residuals 41 120.2 2.93
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = InvSimpson ~ Day_of_study_factor, data = ad.df.ile)
##
## $Day_of_study_factor
## diff lwr upr p adj
## Day_44-Day_27 -5.376613 -6.872672 -3.880554 0.00000
## Day_70-Day_27 -5.072451 -6.649827 -3.495075 0.00000
## Day_70-Day_44 0.304162 -1.250160 1.858484 0.88303
# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df.cae <- data.frame("Description" = colnames(ps1.cae.otu))
ad.df.cae$Shannon <- estimate_richness(ps1.cae)$Shannon
## Warning in estimate_richness(ps1.cae): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df.cae$InvSimpson <- estimate_richness(ps1.cae)$InvSimpson
## Warning in estimate_richness(ps1.cae): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df.cae <- merge(meta(ps1.cae.r), ad.df.cae, by = "Description", all.x=T)
ad.df.cae <- ad.df.cae
# mixed model: random term and variance structure
lme.cae.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
data = ad.df.cae, method = "REML",
random = ~ 1 | Ear_tag)
lme.cae.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
data = ad.df.cae, method = "REML",
random = ~ 1 | Ear_tag)
# residual plots to visually inspect normality assumption
plot(resid(lme.cae.pd1, method= "pearson")~ad.df.cae$Day_of_study_factor); abline(0,0)
hist(resid(lme.cae.pd1, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.cae.pd1, ∼ranef (.))
qqnorm(lme.cae.pd1)
plot(resid(lme.cae.pd2, method= "pearson")~ad.df.cae$Day_of_study_factor); abline(0,0)
hist(resid(lme.cae.pd2, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.cae.pd2, ∼ranef (.))
qqnorm(lme.cae.pd2)
# model output
aov.pd.ls1 <- anova(lme.cae.pd1)
aov.pd.ls1
## numDF denDF F-value p-value
## (Intercept) 1 42 6765.088 <.0001
## Day_of_study_factor 2 42 19.930 <.0001
## Treatment 1 42 0.091 0.7648
## Day_of_study_factor:Treatment 2 42 1.534 0.2275
aov.pd.ls2 <- anova(lme.cae.pd2)
aov.pd.ls2
## numDF denDF F-value p-value
## (Intercept) 1 42 412.5819 <.0001
## Day_of_study_factor 2 42 21.9302 <.0001
## Treatment 1 42 0.1404 0.7098
## Day_of_study_factor:Treatment 2 42 0.8890 0.4187
# test differences between timepoints - Shannon diversity
res.aov2 <- aov(Shannon ~ Day_of_study_factor, data = ad.df.cae)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Day_of_study_factor 2 4.122 2.0611 19.86 6.57e-07 ***
## Residuals 45 4.670 0.1038
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon ~ Day_of_study_factor, data = ad.df.cae)
##
## $Day_of_study_factor
## diff lwr upr p adj
## Day_44-Day_27 -0.7176074 -0.99364948 -0.44156527 0.0000003
## Day_70-Day_27 -0.3741400 -0.65018207 -0.09809786 0.0055161
## Day_70-Day_44 0.3434674 0.06742531 0.61950952 0.0114964
# test differences between timepoints - InvSimpson diversity
res.aov2 <- aov(InvSimpson ~ Day_of_study_factor, data = ad.df.cae)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Day_of_study_factor 2 3175 1587.4 22.47 1.71e-07 ***
## Residuals 45 3179 70.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = InvSimpson ~ Day_of_study_factor, data = ad.df.cae)
##
## $Day_of_study_factor
## diff lwr upr p adj
## Day_44-Day_27 -19.865000 -27.067086 -12.662915 0.0000001
## Day_70-Day_27 -11.226777 -18.428862 -4.024691 0.0013187
## Day_70-Day_44 8.638223 1.436138 15.840309 0.0152966
ps1.glucan <- readRDS("./phyobjects/ps1.glucan.rds")
print(ps1.glucan)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2969 taxa and 421 samples ]
## sample_data() Sample Data: [ 421 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 2969 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2969 tips and 2968 internal nodes ]
ps1.glucan@sam_data$Day_of_study <- factor(ps1.glucan@sam_data$Day_of_study, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1.glucan@sam_data$Pre_or_post_weaning <- factor(ps1.glucan@sam_data$Pre_or_post_weaning, levels = c("Pre-weaning", "Post-weaning"))
ps1.glucan@sam_data$Origin_Day <- factor(ps1.glucan@sam_data$Origin_Day, levels = c("faeces_Day_4", "faeces_Day_8", "faeces_Day_14", "faeces_Day_26", "faeces_Day_35", "faeces_Day_43", "faeces_Day_59", "faeces_Day_69", "jejunum_digesta_Day_27", "jejunum_digesta_Day_44", "jejunum_digesta_Day_70", "ileum_digesta_Day_27", "ileum_digesta_Day_44", "ileum_digesta_Day_70", "caecum_digesta_Day_27", "caecum_digesta_Day_44", "caecum_digesta_Day_70"))
ps1.subset <- subset_samples(ps1.glucan, Origin %in% c("faeces", "jejunum_digesta", "ileum_digesta", "caecum_digesta"))
ps1.subset <- prune_taxa(taxa_sums(otu_table(ps1.subset))>0, ps1.subset)
taxic <- as.data.frame(ps1.subset@tax_table)
taxic$OTU <- rownames(taxic)
tax_table(ps1.subset) <- tax_table(as.matrix(taxic))
ps1.subset@phy_tree <- NULL
ps1.fam <- microbiome::aggregate_top_taxa(ps1.subset, "Family", top = 25)
## Warning: 'microbiome::aggregate_top_taxa' is deprecated.
## Use 'aggregate_rare' instead.
## See help("Deprecated") and help("The microbiome::aggregate_top_taxa function is deprecated.-deprecated").
ps1.fam.r <- microbiome::transform(ps1.fam, "compositional")
getPalette = colorRampPalette(brewer.pal(12, "Paired"))
guide_italics <- guides(fill = guide_legend(label.theme = element_text(size = 15,
face = "italic", colour = "Black", angle = 0)))
Barplot family level
pFam1 <- plot_composition(ps1.fam.r, x.label = "Treatment",
otu.sort = "abundance",
group_by = "Origin_Day", average_by = "Origin_Day") +
scale_fill_manual(values = getPalette(26)) + theme_bw() +
theme(axis.text.x = element_text(angle = 90),
legend.title = element_text(size=18),
legend.position = "right") +
ggtitle("Relative abundance") + guide_italics
pFam1
pdf(file = "./figures/Barplot_Family.pdf", height = 10, width = 15)
pFam1
dev.off() #The final figure can be found under panel B in Figure 3 of the manuscript
## png
## 2
Barplot on Phylum level - prepare data
ps1.subset <- subset_samples(ps1.glucan, Origin %in% c("faeces", "jejunum_digesta", "ileum_digesta", "caecum_digesta"))
ps1.subset <- prune_taxa(taxa_sums(otu_table(ps1.subset))>0, ps1.subset)
taxic <- as.data.frame(ps1.subset@tax_table)
taxic$OTU <- rownames(taxic)
tax_table(ps1.subset) <- tax_table(as.matrix(taxic))
ps1.subset@phy_tree <- NULL
ps1.phyl <- microbiome::aggregate_top_taxa(ps1.subset, "Phylum", top = 10)
## Warning: 'microbiome::aggregate_top_taxa' is deprecated.
## Use 'aggregate_rare' instead.
## See help("Deprecated") and help("The microbiome::aggregate_top_taxa function is deprecated.-deprecated").
ps1.phyl.r <- microbiome::transform(ps1.phyl, "compositional")
Barplot Phylum level
pphyl1 <- plot_composition(ps1.phyl.r, sample.sort = "neatmap", x.label = "Treatment", otu.sort = "abundance", group_by = "Origin_Day", average_by = "Origin_Day") +
scale_fill_manual(values = getPalette(11)) + theme_bw() +
theme(axis.text.x = element_text(angle = 90),
legend.title = element_text(size=18),
legend.position = "right") +
ggtitle("Relative abundance") + guide_italics
pphyl1
pdf(file = "./figures/Barplot_Phylum.pdf", height = 10, width = 12)
pphyl1
dev.off() #The final figure can be found under panel A in Figure 3 of the manuscript
## png
## 2
Barplot with Archaea - prepare data
ps1.glucan.r <- microbiome::transform(ps1.glucan, "compositional")
ps1.archaea <- subset_taxa(ps1.glucan.r, Phylum == "Euryarchaeota")
ps1.archaea
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 22 taxa and 421 samples ]
## sample_data() Sample Data: [ 421 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 22 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 22 tips and 21 internal nodes ]
ps1.subset <- subset_samples(ps1.archaea, Origin %in% c("faeces", "jejunum_digesta", "ileum_digesta", "caecum_digesta"))
ps1.subset <- prune_taxa(taxa_sums(otu_table(ps1.subset))>0, ps1.subset)
taxic <- as.data.frame(ps1.subset@tax_table)
taxic$OTU <- rownames(taxic)
tax_table(ps1.subset) <- tax_table(as.matrix(taxic))
ps1.subset@phy_tree <- NULL
ps1.phyl <- microbiome::aggregate_top_taxa(ps1.subset, "Genus", top = 10)
## Warning: 'microbiome::aggregate_top_taxa' is deprecated.
## Use 'aggregate_rare' instead.
## See help("Deprecated") and help("The microbiome::aggregate_top_taxa function is deprecated.-deprecated").
Barplot with Archaea
pphyl1 <- plot_composition(ps1.phyl, x.label = "Treatment", otu.sort = "abundance", group_by = "Origin_Day", average_by = "Origin_Day") +
scale_fill_manual(values = getPalette(5)) + theme_bw() +
theme(axis.text.x = element_text(angle = 90),
legend.title = element_text(size=18),
legend.position = "right") +
ggtitle("Relative abundance") + guide_italics
pphyl1
pdf(file = "./figures/Barplot_Genus_archaea.pdf", height = 10, width = 13)
pphyl1
dev.off() #The final figure can be found as Supplementary Figure S5 of the manuscript
## png
## 2
data <- pphyl1$data #data object generated to investigate abundances of Archaea
Input data Principal Coordinates Analysis plot
ps1 <- readRDS("./phyobjects/ps1.glucan.rds")
ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))
ps1 <- subset_samples(ps1, Origin %in% c("faeces"))
ps1 <- prune_taxa(taxa_sums(ps1) > 0, ps1)
ps1 <- microbiome::transform(ps1, "compositional")
Ordination plot based on Weighted Unifrac
set.seed(49275)
ordu.wt.uni = ordinate(ps1, "PCoA", "unifrac", weighted=TRUE)
wt.unifrac <- plot_ordination(ps1, ordu.wt.uni, color="Day_of_study_factor", shape="Treatment")
wt.unifrac <- wt.unifrac + scale_color_viridis(discrete = TRUE, option = "C")+ scale_fill_viridis(discrete = TRUE) +
ggtitle("Ordination plot - PCoA, Weighted UniFrac faecal samples") + geom_point(size = 3) + scale_shape_manual(values=c(15,17))
wt.unifrac <- wt.unifrac +
stat_ellipse(type = "norm", linetype = 5) +
theme_bw()
print(wt.unifrac)
pdf(file = "./figures/PCoA_wt.unifrac.pdf", height = 10, width = 15)
wt.unifrac
dev.off() #The final figure can be found as Figure 4 of the manuscript
## png
## 2
ps1 <- readRDS("./phyobjects/ps1.glucan.rds")
ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))
# subsetting
ps1.fcs <- subset_samples(ps1, Origin %in% c("faeces"))
ps1.fcs.pre <- subset_samples(ps1.fcs, Pre_or_post_weaning %in% c("Pre_weaning"))
ps1.fcs.r <- microbiome::transform(ps1.fcs, "compositional")
ps1.fcs.pre.r <- microbiome::transform(ps1.fcs.pre, "compositional")
# Conversions of data for Adonis tests
fcs.otu <- abundances(ps1.fcs.r)
fcs.meta <- meta(ps1.fcs.r)
fcs.otu.pre <- abundances(ps1.fcs.pre.r)
fcs.meta.pre <- meta(ps1.fcs.pre.r)
Visualize the population density and highlight sample groups:
p <- plot_landscape(ps1.fcs.r, method = "NMDS", distance = "bray", col = "Day_of_study_factor", size =3)
print(p)
p <- plot_landscape(ps1.fcs.r, method = "NMDS", distance = "bray", col = "Treatment", size =3)
print(p)
p <- plot_landscape(ps1.fcs.pre.r, method = "NMDS", distance = "bray", col = "Day_of_study_factor", size =3)
print(p)
p <- plot_landscape(ps1.fcs.pre.r, method = "NMDS", distance = "bray", col = "Treatment", size =3)
print(p)
PERMANOVA significance test for group-level differences
set.seed(343)
permanova <- adonis(t(fcs.otu) ~ Treatment + Day_of_study_factor,
data = fcs.meta, permutations=9999, method = "bray")
print(as.data.frame(permanova$aov.tab)) #no differences in beta-diversity between treatment groups
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Treatment 1 0.3128949 0.3128949 1.329623 0.003094676 0.1493
## Day_of_study_factor 7 35.3739595 5.0534228 21.474141 0.349864949 0.0001
## Residuals 278 65.4206145 0.2353260 NA 0.647040375 NA
## Total 286 101.1074689 NA NA 1.000000000 NA
set.seed(456)
permanova.pre <- adonis(t(fcs.otu.pre) ~ Treatment + Day_of_study_factor,
data = fcs.meta.pre, permutations=9999, method = "bray")
print(as.data.frame(permanova.pre$aov.tab)) #differences in beta-diversity between treatment groups
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Treatment 1 0.4374209 0.4374209 1.711205 0.006996515 0.0411
## Day_of_study_factor 3 14.5368011 4.8456004 18.956153 0.232515071 0.0001
## Residuals 186 47.5456012 0.2556215 NA 0.760488414 NA
## Total 190 62.5198232 NA NA 1.000000000 NA
# all faecal samples - dispersion between treatment groups
dist <- vegdist(t(fcs.otu), method="bray")
anova(betadisper(dist, fcs.meta$Treatment)) #no statistical significant differences in dispersion between treatment groups
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.00054 0.00054152 0.2655 0.6068
## Residuals 285 0.58138 0.00203994
dispersion <- betadisper(dist, group= fcs.meta$Treatment)
plot(dispersion, hull=FALSE, ellipse = TRUE)
# dispersion pre- vs post-weaning
anova(betadisper(dist, fcs.meta$Pre_or_post_weaning)) #statistical significant differences in dispersion between pre- and post-weaning samples
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.81987 0.81987 199.01 < 2.2e-16 ***
## Residuals 285 1.17414 0.00412
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion_weaning <- betadisper(dist, group= fcs.meta$Pre_or_post_weaning)
plot(dispersion_weaning, hull=FALSE, ellipse = TRUE)
# dispersion in each time-point
anova(betadisper(dist, fcs.meta$Day_of_study_factor)) #statistical significant differences in dispersion between time points
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 7 0.42241 0.060344 9.8852 5.101e-11 ***
## Residuals 279 1.70315 0.006104
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion_day <- betadisper(dist, group = fcs.meta$Day_of_study_factor)
plot(dispersion_day, hull=FALSE, ellipse = TRUE)
# pre-weaning faecal samples - dispersion between treatment groups
dist.pre <- vegdist(t(fcs.otu.pre), method="bray")
anova(betadisper(dist.pre, fcs.meta.pre$Treatment)) #no statistical significant differences in dispersion between groups in pre-weaning faecal samples
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.00087 0.00086817 0.2949 0.5878
## Residuals 189 0.55648 0.00294433
dispersion.pre <- betadisper(dist.pre, group= fcs.meta.pre$Treatment)
plot(dispersion.pre, hull=FALSE, ellipse = TRUE)
# pre-weaning faecal samples - dispersion in each time-point
anova(betadisper(dist.pre, fcs.meta.pre$Day_of_study_factor)) #no statistical significant differences in dispersion between timepoints in pre-weaning faecal samples
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 3 0.03017 0.010056 1.9034 0.1305
## Residuals 187 0.98792 0.005283
dispersion.pre_day <- betadisper(dist.pre, group = fcs.meta.pre$Day_of_study_factor)
plot(dispersion.pre_day, hull=FALSE, ellipse = TRUE)
ps1.glucan <- readRDS("./phyobjects/ps1.glucan.rds")
ps1.glucan@sam_data$Day_of_study_factor <- factor(ps1.glucan@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1.glucan.g <- tax_glom(ps1.glucan, "Genus")
ps1.glucan.g.r <- microbiome::transform(ps1.glucan.g, "compositional")
ps1.glucan.g.r <- subset_samples(ps1.glucan.g.r, Origin %in% c("faeces"))
ps1.glucan.g.r <- subset_samples(ps1.glucan.g.r, Pre_or_post_weaning %in% c("Pre_weaning"))
ps1.glucan.g.r <- filter_taxa(ps1.glucan.g.r, function(x) sum(x > 0.001) > (0.50*length(x)), TRUE)
ps1.glucan.p <- prune_taxa(taxa_sums(otu_table(ps1.glucan.g.r)) > 0, ps1.glucan.g.r)
# tax table with OTU column and best_hit
glucan.tax <- as.data.frame(tax_table(ps1.glucan.g.r))
glucan.tax$OTU <- rownames(glucan.tax)
tax_table(ps1.glucan.g.r) <- tax_table(as.matrix(glucan.tax))
ps1.glucan.g.bh <- format_to_besthit(ps1.glucan.g.r)
glucan.tax.bh <- as.data.frame(tax_table(ps1.glucan.g.bh))
colnames(glucan.tax.bh)[7] <- "OTU"
glucan.g.met <- meta(ps1.glucan.p)
glucan.g1 <- glucan.g.met[,c("Description","Ear_tag", "department_pre_weaning",
"Treatment","Day_of_study_factor")]
# OTU table as dataframe:
glucan.g2 <- as.data.frame(t(otu_table(ps1.glucan.p)))
glucan.g2$Description <- rownames(glucan.g2)
# merge files
glucan.g3 <- merge(glucan.g1, glucan.g2, by = "Description")
colnames(glucan.g3)[6:36] <- paste("k__", sep = "", colnames(glucan.g3)[6:36])
Abundance testing on pre-weaning faecal samples
# Results, warnings and messages have been inactivated for this markdown, as otherwise it will use too much space in the HTML output file.
tc.treatment <- taxa.compare(taxtab = glucan.g3, propmed.rel = "gamlss",
transform = "none", comvar = "Treatment",
adjustvar = c("Day_of_study_factor", "department_pre_weaning"),
personid = "Ear_tag", longitudinal = "yes",
p.adjust.method = "fdr")
tc.glucan05 <- subset(tc.treatment, pval.adjust.TreatmentY_glucan < .05)
tc.glucan05$id <- droplevels(as.factor(tc.glucan05$id))
glucan3.m <- reshape2::melt(glucan.g3)
glucan3.max <- ddply(glucan3.m, .(variable), summarise, max = max(value))
glucan3.max1 <- glucan3.max
glucan3.max1$variable <- droplevels(glucan3.max1$variable)
# subset
glucan.g4 <- subset(glucan3.m, variable %in% tc.glucan05$id &
variable %in% glucan3.max1$variable)
# create vector of genus names (OTU codes) to include in plots
select.gen.glucan <- levels(droplevels(glucan.g4$variable)) # 27 genera
select.gen.glucan <- sub("k__", "", select.gen.glucan) # remove "k__"
select.gen.glucan
tc.glucan.sub <- subset(tc.glucan05, id %in% glucan3.max1$variable)
# remove "k__" in id names
tc.glucan.sub$id <- as.factor(sub("k__", "", tc.glucan.sub$id))
# show tax names of output genera
glucan.g4.tax <- subset(glucan.tax.bh, OTU %in% select.gen.glucan) #significant genera
# create vectors of significant genera from comparisons
select.gen.glucan2 <- unique(c(as.character(select.gen.glucan)))
# filter for genera from first output series of GAMLSS tests (select.gen.glucan2).
# object 1: metadata
glucan.tr.met <- meta(ps1.glucan.g.r)
glucan.tr1 <- glucan.tr.met[,c("Description","Ear_tag", "department_pre_weaning",
"Treatment","Day_of_study_factor")]
# object 2: OTU table
glucan.tr2 <- as.data.frame(t(otu_table(ps1.glucan.g.r)))
glucan.tr2$Description <- rownames(glucan.tr2)
# melt glucan.tr2
glucan.tr2.m <- reshape2::melt(glucan.tr2)
## Using Description as id variables
colnames(glucan.tr2.m) <- c("Description", "OTU", "abund")
# merge glucan.tr2, glucan.tax and glucan.tr1
glucan.tr3a <- base::merge(glucan.tr2.m, glucan.tax, by = "OTU")
glucan.tr3 <- base::merge(glucan.tr1, glucan.tr3a, by = "Description")
# subset to signif genera and max(abund) > 0.1
glucan.tr3.max <- ddply(glucan.tr3, .(OTU), summarise, max = max(abund))
glucan.tr3.max <- subset(glucan.tr3.max, max > .1)
glucan.tr3.max$OTU <- droplevels(glucan.tr3.max$OTU)
glucan.tr3.s <- subset(glucan.tr3, OTU %in% select.gen.glucan2 & OTU %in%
glucan.tr3.max$OTU)
glucan.tr3.s$OTU <- droplevels(glucan.tr3.s$OTU)
theme4 <- theme_classic() +
theme(panel.grid.major = element_line(colour = "grey80"),
panel.spacing = unit(.5,"lines"),
panel.border = element_rect(color = "black", fill = NA, size = .5),
strip.background = element_blank(),
strip.placement = "outside",
text = element_text(size=15),
axis.text.x = element_text(hjust = .5, vjust = .5),
plot.title = element_text(size = 12))
labs_abd <- as_labeller(c(
"faeces" = "Faeces", "ileum_digesta" = "Ileum digesta", "jejunum_digesta" = " Jejunum digesta"))
# for-loop
sig.plot <- list()
for(i in 1:nlevels(glucan.tr3.s$OTU)){
a = levels(glucan.tr3.s$OTU)[i]
B = glucan.tr3.s[glucan.tr3.s$OTU == a,]
p = ggplot(B, aes(x = Day_of_study_factor, y = abund, colour = Treatment, alpha = 0.95)) +
geom_boxplot(outlier.size = 0) +
geom_point(size = 2, position = position_jitterdodge()) +
labs(x="time (d)", y="rel. abundance") +
scale_y_continuous(limits = c(0,1), n.breaks = 6) +
ggtitle(paste(B$Order,B$Family,B$Genus,B$OTU)) + theme4
sig.plot[[i]] = p
}
# print plots
pdf("./figures/Differential_abundant_genera.pdf")
for (i in 1:nlevels(glucan.tr3.s$OTU)) {
print(sig.plot[[i]])
}
dev.off() #Resulting figures can be found under Figure 5 of the manuscript
## png
## 2
# only faeces pre-weaning
fcs.pre <- readRDS("./phyobjects/ps1.faeces.rds")
fcs.pre <- subset_samples(fcs.pre, Pre_or_post_weaning == "Pre_weaning")
fcs.pre <- prune_taxa(taxa_sums(otu_table(fcs.pre)) > 0, fcs.pre)
# only faeces post-weaning
fcs.post <- readRDS("./phyobjects/ps1.faeces.rds")
fcs.post <- subset_samples(fcs.post, Pre_or_post_weaning == "Post_weaning")
fcs.post <- prune_taxa(taxa_sums(otu_table(fcs.post)) > 0, fcs.post)
print(fcs.pre)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1640 taxa and 191 samples ]
## sample_data() Sample Data: [ 191 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 1640 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1640 tips and 1639 internal nodes ]
print(fcs.post)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1159 taxa and 96 samples ]
## sample_data() Sample Data: [ 96 samples by 115 sample variables ]
## tax_table() Taxonomy Table: [ 1159 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1159 tips and 1158 internal nodes ]
# transform to relative abundance
fcs.pre.r <- microbiome::transform(fcs.pre, "compositional")
fcs.post.r <- microbiome::transform(fcs.post, "compositional")
rank_names(fcs.pre.r)
## [1] "Domain" "Phylum" "Class" "Order" "Family" "Genus"
pre.gen.r <- tax_glom(fcs.pre.r, taxrank = rank_names(fcs.pre.r) [6]) #Aggregate to genus level
# Add Family level names
taxa_names(pre.gen.r) <- interaction(rownames(pre.gen.r@tax_table[,c("Family")]),pre.gen.r@tax_table[,c("Family")])
# Add Genus level names
taxa_names(pre.gen.r) <- interaction(rownames(pre.gen.r@tax_table[,c("Genus")]),pre.gen.r@tax_table[,c("Genus")])
# Genus level
df.pre.gen.r <- as.data.frame(t(abundances(pre.gen.r)))
df.pre.gen.r$Description <- rownames(df.pre.gen.r)
dfm.pre.gen.r <- merge(meta(pre.gen.r)[c("Description","Treatment", "Day_of_study")], df.pre.gen.r, by="Description")
response<-dfm.pre.gen.r[,4:length(dfm.pre.gen.r)]
time<-as.factor(dfm.pre.gen.r[,3])
treatment<-as.factor(dfm.pre.gen.r[,2])
# Set up objects and matrices
y <- deparse(substitute(response))
x <- deparse(substitute(treatment))
z <- deparse(substitute(time))
# Create formulas and design matrices
fla <- as.formula(paste("~", x, "+", z))
mf <- model.frame(fla, response, na.action = na.pass)
fla.zx <- as.formula(paste("~", z, ":", x))
fla.z <- as.formula(paste("~", z))
X = model.matrix(fla.zx, mf)[, -c(seq_len(nlevels(time) + 1))]
Z = model.matrix(fla.z, mf)[, -1]
# Calculate Weighted UniFrac distance matrix
dm.pre.gen.r.wUF <- UniFrac(pre.gen.r, weighted = T)
# Calculation of UniFrac distance-based RDA
dbRDA.pre <- capscale(dm.pre.gen.r.wUF ~ X + Condition(Z), data = dfm.pre.gen.r[,2:3], comm = response)
# Conversion of dbRDA to PRC compatible format
dbRDA.pre$terminfo$xlev = list(levels(time), levels(treatment))
names(dbRDA.pre$terminfo$xlev) = c(paste(z), paste(x))
dbRDA.pre$call <- match.call()
class(dbRDA.pre) <- c("prc", class(dbRDA.pre))
sum_dbprc <- summary(dbRDA.pre)
plot(dbRDA.pre, main = "dbPRC faecal samples w_unifrac genus level", select = abs(sum_dbprc$sp) > 0.2)
pdf(file = "./figures/prc_pre-weaning.pdf", height = 5, width = 8)
plot(dbRDA.pre, main = "dbPRC faecal samples w_unifrac genus level", select = abs(sum_dbprc$sp) > 0.2)
dev.off() #The final figure can be found under panel A in Supplementary Figure S4 of the manuscript
## png
## 2
post.gen.r <- tax_glom(fcs.post.r, taxrank = rank_names(fcs.post.r) [6])
rank_names(post.gen.r)
## [1] "Domain" "Phylum" "Class" "Order" "Family" "Genus"
# Add Family level names
taxa_names(post.gen.r) <- interaction(rownames(post.gen.r@tax_table[,c("Family")]),post.gen.r@tax_table[,c("Family")])
# Add Genus level names
taxa_names(post.gen.r) <- interaction(rownames(post.gen.r@tax_table[,c("Genus")]),post.gen.r@tax_table[,c("Genus")])
df.post.gen.r <- as.data.frame(t(abundances(post.gen.r)))
df.post.gen.r$Description <- rownames(df.post.gen.r)
dfm.post.gen.r <- merge(meta(post.gen.r)[c("Description","Treatment", "Day_of_study")], df.post.gen.r, by="Description")
response<-dfm.post.gen.r[,4:length(dfm.post.gen.r)]
time<-as.factor(dfm.post.gen.r[,3])
treatment<-as.factor(dfm.post.gen.r[,2])
# Set up objects and matrices
y <- deparse(substitute(response))
x <- deparse(substitute(treatment))
z <- deparse(substitute(time))
# Create formulas and design matrices
fla <- as.formula(paste("~", x, "+", z))
mf <- model.frame(fla, response, na.action = na.pass)
fla.zx <- as.formula(paste("~", z, ":", x))
fla.z <- as.formula(paste("~", z))
X = model.matrix(fla.zx, mf)[, -c(seq_len(nlevels(time) + 1))]
Z = model.matrix(fla.z, mf)[, -1]
# Calculate Weighted UniFrac distance matrix
dm.post.gen.r.wUF <- UniFrac(post.gen.r, weighted = T)
# Calculation of UniFrac distance based RDA
dbRDA.CFgen <- capscale(dm.post.gen.r.wUF ~ X + Condition(Z), data = dfm.post.gen.r[,2:3], comm = response)
# Conversion of dbRDA to PRC compatible format
dbRDA.CFgen$terminfo$xlev = list(levels(time), levels(treatment))
names(dbRDA.CFgen$terminfo$xlev) = c(paste(z), paste(x))
dbRDA.CFgen$call <- match.call()
class(dbRDA.CFgen) <- c("prc", class(dbRDA.CFgen))
sum_dbprc <- summary(dbRDA.CFgen)
plot(dbRDA.CFgen, main = "dbPRC faecal samples w_unifrac genus level", select = abs(sum_dbprc$sp) > 0.2)
pdf(file = "./figures/prc_post-weaning.pdf", height = 5, width = 8)
plot(dbRDA.CFgen, main = "dbPRC faecal samples w_unifrac genus level", select = abs(sum_dbprc$sp) > 0.2)
dev.off() #The final figure can be found under panel B in Supplementary Figure S4 of the manuscript
## png
## 2
ps1 <- readRDS("./phyobjects/ps1.glucan.rds")
ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))
ps1.fcs <- subset_samples(ps1, Origin %in% c("faeces"))
ps1.fcs.fam <- microbiome::aggregate_taxa(ps1.fcs, "Family")
# remove taxa with 0 abundance
ps1.fcs.fam <- prune_taxa(taxa_sums(otu_table(ps1.fcs.fam)) > 0, ps1.fcs.fam)
# add FAM column
ps1.fcs.fam.tax <- as.data.frame(ps1.fcs.fam@tax_table)
ps1.fcs.fam.tax$FAM <- rownames(ps1.fcs.fam.tax)
tax_table(ps1.fcs.fam) <- tax_table(as.matrix(ps1.fcs.fam.tax))
# relative abundance
ps1.fcs.fam.r <- microbiome::transform(ps1.fcs.fam, "compositional")
# select top 35 by summed rel. abund.
ps1.fcs.fam.abund <- abundances(ps1.fcs.fam.r)
ps1.fcs.fam.abund.m <- reshape2::melt(ps1.fcs.fam.abund)
colnames(ps1.fcs.fam.abund.m) <- c("FAM", "Description", "rel_abund")
ps1.fcs.fam.sum <- ddply(ps1.fcs.fam.abund.m, .(FAM), summarise, mean = mean(rel_abund))
ps1.fcs.fam.tax$reads <- taxa_sums(ps1.fcs.fam) # total reads per genus
ps1.fcs.fam.tax$sum_abund <- taxa_sums(ps1.fcs.fam.r) # summed relative abund.
ps1.fcs.fam.tax$mean_abund <- ps1.fcs.fam.sum$mean # mean relative abundance
# top 35
ps1.fcs.fam.top35 <- top_n(ps1.fcs.fam.tax, n = 35, wt = sum_abund)
ps1.fcs.fam.top35$FAM <- as.factor(ps1.fcs.fam.top35$FAM)
# top 35 with phy_tree
ps1.fcs.fam35.r <- subset_taxa(ps1.fcs.fam.r, FAM %in% ps1.fcs.fam.top35$FAM)
theme_hm <- theme_classic() + theme(
axis.text.y = element_text(colour = 'black', face = 'italic'),
legend.key = element_blank(), text = element_text(size = 20),
panel.spacing.x=unit(0, "lines"),
strip.background = element_rect(colour="black", fill="white"))
# create heatmap plot data
fam35.hm.df <- plot_heatmap(ps1.fcs.fam35.r, method = "CAP", distance = "bray",
formula = ~ Day_of_study_factor)$data
# clean up tax names
fam35.hm.df$taxname <- ifelse(fam35.hm.df$Family == "f__",
yes = paste(fam35.hm.df$Class, "(unassigned)"),
no = paste(fam35.hm.df$Family))
fam35.hm.df$taxname <- gsub(fam35.hm.df$taxname, pattern = "[a-z]__",
replacement = "")
# summarise per pen per timepoint
fam35.hm.sum <- ddply(fam35.hm.df, .(Day_of_study_factor, Origin, Pen_no_pre_weaning,
taxname, FAM),
summarise, median = median(Abundance),
mean = mean(Abundance))
fam35.hm.sum$group <- interaction(fam35.hm.sum$Origin,
fam35.hm.sum$Day_of_study_factor, fam35.hm.sum$Pen_no_pre_weaning,
drop=T)
fam35.hm.sum$taxname <- as.factor(fam35.hm.sum$taxname)
# Bray-curtis distance matrix for taxa clustering
fam35.hm.cast <- reshape2::dcast(fam35.hm.sum[,c(4,6,8)], taxname ~ group,
value.var = "median")
rownames(fam35.hm.cast) <- fam35.hm.cast[,1]
fam35.hm.mat <- as.matrix(fam35.hm.cast[,c(2:129)])
bray35.sp <- vegdist(fam35.hm.mat, method = "bray")
# reorder levels by clustering
sp.order <- hclust(bray35.sp)$order
fam35.hm.sum$taxname <- factor(fam35.hm.sum$taxname,
levels(fam35.hm.sum$taxname)[sp.order])
levels(fam35.hm.sum$taxname)
## [1] "Akkermansiaceae" "Corynebacteriaceae"
## [3] "Desulfovibrionaceae" "Tannerellaceae"
## [5] "p-2534-18B5_gut_group" "Pirellulaceae"
## [7] "Synergistaceae" "Clostridiales_vadinBB60_group"
## [9] "Marinifilaceae" "Atopobiaceae"
## [11] "Eggerthellaceae" "Streptococcaceae"
## [13] "Succinivibrionaceae" "Lactobacillaceae"
## [15] "Prevotellaceae" "Lachnospiraceae"
## [17] "Ruminococcaceae" "Muribaculaceae"
## [19] "Veillonellaceae" "Christensenellaceae"
## [21] "Methanobacteriaceae" "Clostridiaceae_1"
## [23] "Peptostreptococcaceae" "Spirochaetaceae"
## [25] "Acidaminococcaceae" "Erysipelotrichaceae"
## [27] "Family_XIII" "Rikenellaceae"
## [29] "Enterobacteriaceae" "Bacteroidaceae"
## [31] "Fusobacteriaceae" "Enterococcaceae"
## [33] "Campylobacteraceae" "Actinomycetaceae"
## [35] "Pasteurellaceae"
With Bray-Curtis complete-linkage clustering of taxa.
theme_hm <- theme_classic() + theme(
axis.text.y = element_text(colour = 'black', face = 'italic'),
axis.text.x = element_text(size = 10, colour = 'white', face = 'italic'),
legend.key = element_blank(), text = element_text(size = 20),
panel.spacing.x=unit(0, "lines"),
strip.background = element_rect(colour="black", fill="white"))
p.fam35.hm <- ggplot(fam35.hm.sum, aes(x=Pen_no_pre_weaning, y=taxname)) +
geom_tile(aes(fill=mean)) +
scale_fill_viridis("Rel. abundance", option = "E",
na.value = "black", trans="log10") +
facet_grid(~Day_of_study_factor, scales = "free") +
labs(x="Means per pen",y=NULL) +
scale_y_discrete(position = "right") +
theme_hm
p.fam35.hm
## Warning: Transformation introduced infinite values in discrete y-axis
pdf(file = "./figures/Heatmap_families.pdf", height = 9, width = 35)
p.fam35.hm
## Warning: Transformation introduced infinite values in discrete y-axis
dev.off() #The final figure can be found as Supplementary Figure S6 of the manuscript
## png
## 2
Figures were generated using code below, and were then manually exported. This is why they are not showing up in the ‘figures’ folder. The unprocessed figures can be found under their individual folders.
# Rotate input file from folder 'input_data_flow_cytometry' to perform the analysis for other immunological parameters:
my_data <- read.table("./input_data_flow_cytometry/Flow_cytometry_DC_MLN_CDC1_CD8086_BG.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
my_data$id <- as.factor(my_data$id)
my_data$group <- as.factor(my_data$group)
my_data$time <- as.factor(my_data$time)
# Load the data
data("genderweight", package = "datarium")
# Show a sample of the data by group
set.seed(123)
my_data %>% sample_n_by(group, size = 2)
## # A tibble: 4 x 4
## id group time score
## <fct> <fct> <fct> <dbl>
## 1 7 Control t2 305
## 2 3 Control t3 157
## 3 6 Y_glucan t2 191
## 4 3 Y_glucan t1 80.9
my_data %>%
group_by(group) %>%
get_summary_stats(score, type = "mean_sd")
## # A tibble: 2 x 5
## group variable n mean sd
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 Control score 24 132. 72.1
## 2 Y_glucan score 23 146. 74.2
# Save the data in two different vector
Y_glucan <- my_data %>%
filter(group == "Y_glucan") %>%
pull(score)
Control <- my_data %>%
filter(group == "Control") %>%
pull(score)
# Compute t-test
res <- t.test(Y_glucan, Control)
res
##
## Welch Two Sample t-test
##
## data: Y_glucan and Control
## t = 0.63763, df = 44.771, p-value = 0.527
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -29.39358 56.62040
## sample estimates:
## mean of x mean of y
## 146.0217 132.4083
# Levene
levene_test(score ~ group, data = my_data)
## # A tibble: 1 x 4
## df1 df2 statistic p
## <int> <int> <dbl> <dbl>
## 1 1 45 0.0910 0.764
# Compute t-test
res <- t.test(score ~ group, data = my_data)
res
##
## Welch Two Sample t-test
##
## data: score by group
## t = -0.63763, df = 44.771, p-value = 0.527
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -56.62040 29.39358
## sample estimates:
## mean in group Control mean in group Y_glucan
## 132.4083 146.0217
stat.test <- my_data %>%
t_test(score ~ group) %>%
add_significance()
stat.test
## # A tibble: 1 x 9
## .y. group1 group2 n1 n2 statistic df p p.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 score Control Y_glucan 24 24 -0.638 44.8 0.527 ns
my_data %>%
t_test(score ~ group, detailed = TRUE) %>%
add_significance()
## # A tibble: 1 x 16
## estimate estimate1 estimate2 .y. group1 group2 n1 n2 statistic p
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 -13.6 132. 146. score Contr~ Y_glu~ 24 24 -0.638 0.527
## # ... with 6 more variables: df <dbl>, conf.low <dbl>, conf.high <dbl>,
## # method <chr>, alternative <chr>, p.signif <chr>
my_data %>% cohens_d(score ~ group, var.equal = FALSE)
## # A tibble: 1 x 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 score Control Y_glucan -0.186 24 24 negligible
# Create a box-plot
bxp <- ggboxplot(
my_data, x = "group", y = "score",
ylab = "score", xlab = "Groups", add = "jitter"
)
# Add p-value and significance levels
stat.test <- stat.test %>% add_xy_position(x = "group")
bxp +
stat_pvalue_manual(stat.test, tip.length = 0) +
labs(subtitle = get_test_label(stat.test, detailed = TRUE))
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).
# Rotate input file from folder 'input_data_flow_cytometry' to perform the analysis for other immunological parameters:
my_data <- read.table("./input_data_flow_cytometry/Flow_cytometry_DC_MLN_CDC1_CD8086_BG.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
my_data$id <- as.factor(my_data$id)
my_data$group <- as.factor(my_data$group)
my_data$time <- as.factor(my_data$time)
# Inspect some random rows of the data by groups
set.seed(123)
my_data %>% sample_n_by(group, time, size = 1)
## # A tibble: 6 x 4
## id group time score
## <fct> <fct> <fct> <dbl>
## 1 7 Control t1 69.4
## 2 7 Control t2 305
## 3 3 Control t3 157
## 4 6 Y_glucan t1 25.7
## 5 3 Y_glucan t2 179
## 6 2 Y_glucan t3 186
#Extreme outliers are removed from the analysis.
my_data %>%
group_by(time) %>%
identify_outliers(score)
## # A tibble: 1 x 6
## time id group score is.outlier is.extreme
## <fct> <fct> <fct> <dbl> <lgl> <lgl>
## 1 t3 1 Control 274 TRUE FALSE
#Jitter
my_data$group <- factor(my_data$group, levels = c("Control", "Y_glucan", "Control_med", "Y_glucan_med"))
my_data %>%
filter(group %in% c("Control", "Y_glucan", "Control_med", "Y_glucan_med" )) %>%
ggplot(aes(x=time, y=score, fill = factor(group))) +
geom_boxplot() +
labs(fill="group") +
scale_fill_manual(values = c("#E69F00", "#669966", "#CCCCCC", "#999999")) +
geom_point(position=position_jitterdodge(),alpha=0.5, size = 5)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).
# Two-way ANOVA
# http://rstudio-pubs-static.s3.amazonaws.com/476761_eb23c47a75d6447688cb793ac8a3e461.html
res.aov2 <- aov(score ~ group + time, data = my_data)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 1 2177 2177 0.872 0.356
## time 2 133227 66613 26.674 2.93e-08 ***
## Residuals 43 107385 2497
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
# Two-way ANOVA with interaction effect
# These two calls are equivalent
res.aov3 <- aov(score ~ group * time, data = my_data)
res.aov3 <- aov(score ~ group + time + group:time, data = my_data)
summary(res.aov3)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 1 2177 2177 0.853 0.361
## time 2 133227 66613 26.109 4.87e-08 ***
## group:time 2 2779 1389 0.545 0.584
## Residuals 41 104606 2551
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
require("dplyr")
group_by(my_data, group, time) %>%
summarise(
count = n(),
mean = mean(score, na.rm = TRUE),
sd = sd(score, na.rm = TRUE)
)
## `summarise()` regrouping output by 'group' (override with `.groups` argument)
## # A tibble: 6 x 5
## # Groups: group [2]
## group time count mean sd
## <fct> <fct> <int> <dbl> <dbl>
## 1 Control t1 8 63.0 21.6
## 2 Control t2 8 168. 75.2
## 3 Control t3 8 166 51.9
## 4 Y_glucan t1 8 60.4 24.1
## 5 Y_glucan t2 8 200. 63.4
## 6 Y_glucan t3 8 167. 40.0
model.tables(res.aov2, type="means", se = TRUE)
## Design is unbalanced - use se.contrast() for se's
## Tables of means
## Grand mean
##
## 139.0702
##
## group
## Control Y_glucan
## 132.4 146
## rep 24.0 23
##
## time
## t1 t2 t3
## 62.06 183.9 166.4
## rep 15.00 16.0 16.0
TukeyHSD(res.aov2, which = "time")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = score ~ group + time, data = my_data)
##
## $time
## diff lwr upr p adj
## t2-t1 121.8554 78.25793 165.45284 0.0000001
## t3-t1 104.3554 60.75793 147.95284 0.0000020
## t3-t2 -17.5000 -60.38851 25.38851 0.5867905
# 1. Homogeneity of variances
plot(res.aov3, 1)
library(car)
leveneTest(score ~ group*time, data = my_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 0.8258 0.5387
## 41
# 2. Normality
plot(res.aov3, 2)
# 3. Skewness
#Skewness between -2 and +2 acceptable (George and Malley, 2010).
library(moments)
skewness(my_data$score, na.rm = TRUE)
## [1] 0.641101
# Extract the residuals
aov_residuals <- residuals(object = res.aov3)
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals)
##
## Shapiro-Wilk normality test
##
## data: aov_residuals
## W = 0.88361, p-value = 0.000225
# Rotate input file from folder 'input_data_restimulation_assay' to perform the analysis for other immunological parameters:
my_data <- read.table("./input_data_restimulation_assay/Restimulation_Assay_MLN_IL10_CONA5.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
my_data$id <- as.factor(my_data$id)
my_data$group <- as.factor(my_data$group)
my_data$time <- as.factor(my_data$time)
# Load the data
data("genderweight", package = "datarium")
# Show a sample of the data by group
set.seed(123)
my_data %>% sample_n_by(group, size = 2)
## # A tibble: 4 x 4
## id group time score
## <fct> <fct> <fct> <dbl>
## 1 7 Control t2 2852.
## 2 3 Control t3 719.
## 3 6 Y_glucan t2 1293.
## 4 3 Y_glucan t1 692.
my_data %>%
group_by(group) %>%
get_summary_stats(score, type = "mean_sd")
## # A tibble: 2 x 5
## group variable n mean sd
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 Control score 23 1099. 870.
## 2 Y_glucan score 23 1267. 881.
# Save the data in two different vector
Y_glucan <- my_data %>%
filter(group == "Y_glucan") %>%
pull(score)
Control <- my_data %>%
filter(group == "Control") %>%
pull(score)
# Compute t-test
res <- t.test(Y_glucan, Control)
res
##
## Welch Two Sample t-test
##
## data: Y_glucan and Control
## t = 0.65064, df = 43.993, p-value = 0.5187
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -352.4139 688.4389
## sample estimates:
## mean of x mean of y
## 1267.478 1099.465
# Levene
levene_test(score ~ group, data = my_data)
## # A tibble: 1 x 4
## df1 df2 statistic p
## <int> <int> <dbl> <dbl>
## 1 1 44 0.0207 0.886
# Compute t-test
res <- t.test(score ~ group, data = my_data)
res
##
## Welch Two Sample t-test
##
## data: score by group
## t = -0.65064, df = 43.993, p-value = 0.5187
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -688.4389 352.4139
## sample estimates:
## mean in group Control mean in group Y_glucan
## 1099.465 1267.478
stat.test <- my_data %>%
t_test(score ~ group) %>%
add_significance()
stat.test
## # A tibble: 1 x 9
## .y. group1 group2 n1 n2 statistic df p p.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 score Control Y_glucan 24 24 -0.651 44.0 0.519 ns
my_data %>%
t_test(score ~ group, detailed = TRUE) %>%
add_significance()
## # A tibble: 1 x 16
## estimate estimate1 estimate2 .y. group1 group2 n1 n2 statistic p
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 -168. 1099. 1267. score Contr~ Y_glu~ 24 24 -0.651 0.519
## # ... with 6 more variables: df <dbl>, conf.low <dbl>, conf.high <dbl>,
## # method <chr>, alternative <chr>, p.signif <chr>
my_data %>% cohens_d(score ~ group, var.equal = FALSE)
## # A tibble: 1 x 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 score Control Y_glucan -0.192 24 24 negligible
# Create a box-plot
bxp <- ggboxplot(
my_data, x = "group", y = "score",
ylab = "score", xlab = "Groups", add = "jitter"
)
# Add p-value and significance levels
stat.test <- stat.test %>% add_xy_position(x = "group")
bxp +
stat_pvalue_manual(stat.test, tip.length = 0) +
labs(subtitle = get_test_label(stat.test, detailed = TRUE))
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).
Restimulation assay - two-way ANOVA
# Rotate input file from folder 'input_data_restimulation_assay' to perform the analysis for other immunological parameters:
my_data <- read.table("./input_data_restimulation_assay/Restimulation_Assay_MLN_IL10_CONA5.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
my_data$id <- as.factor(my_data$id)
my_data$group <- as.factor(my_data$group)
my_data$time <- as.factor(my_data$time)
# Inspect some random rows of the data by groups
set.seed(123)
my_data %>% sample_n_by(group, time, size = 1)
## # A tibble: 6 x 4
## id group time score
## <fct> <fct> <fct> <dbl>
## 1 7 Control t1 991.
## 2 7 Control t2 2852.
## 3 3 Control t3 719.
## 4 6 Y_glucan t1 1289.
## 5 3 Y_glucan t2 1955.
## 6 2 Y_glucan t3 381.
#Extreme outliers are removed from the analysis.
my_data %>%
group_by(time) %>%
identify_outliers(score)
## # A tibble: 2 x 6
## time id group score is.outlier is.extreme
## <fct> <fct> <fct> <dbl> <lgl> <lgl>
## 1 t1 1 Y_glucan 2231. TRUE FALSE
## 2 t3 1 Y_glucan 1545. TRUE FALSE
#Jitter
my_data$group <- factor(my_data$group, levels = c("Control", "Y_glucan", "Control_med", "Y_glucan_med"))
my_data %>%
filter(group %in% c("Control", "Y_glucan", "Control_med", "Y_glucan_med" )) %>%
ggplot(aes(x=time, y=score, fill = factor(group))) +
geom_boxplot() +
labs(fill="group") +
scale_fill_manual(values = c("#E69F00", "#669966", "#CCCCCC", "#999999")) +
geom_point(position=position_jitterdodge(),alpha=0.5, size = 5)
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).
# Two-way ANOVA
# http://rstudio-pubs-static.s3.amazonaws.com/476761_eb23c47a75d6447688cb793ac8a3e461.html
res.aov2 <- aov(score ~ group + time, data = my_data)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 1 324624 324624 0.701 0.407
## time 2 14285532 7142766 15.420 9.52e-06 ***
## Residuals 42 19455387 463223
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
# Two-way ANOVA with interaction effect
# These two calls are equivalent
res.aov3 <- aov(score ~ group * time, data = my_data)
res.aov3 <- aov(score ~ group + time + group:time, data = my_data)
summary(res.aov3)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 1 324624 324624 0.686 0.412
## time 2 14285532 7142766 15.095 1.31e-05 ***
## group:time 2 527873 263936 0.558 0.577
## Residuals 40 18927514 473188
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 2 observations deleted due to missingness
require("dplyr")
group_by(my_data, group, time) %>%
summarise(
count = n(),
mean = mean(score, na.rm = TRUE),
sd = sd(score, na.rm = TRUE)
)
## `summarise()` regrouping output by 'group' (override with `.groups` argument)
## # A tibble: 6 x 5
## # Groups: group [2]
## group time count mean sd
## <fct> <fct> <int> <dbl> <dbl>
## 1 Control t1 8 679. 438.
## 2 Control t2 8 1928. 958.
## 3 Control t3 8 633. 279.
## 4 Y_glucan t1 8 1166. 590.
## 5 Y_glucan t2 8 1939. 1034.
## 6 Y_glucan t3 8 685. 401.
model.tables(res.aov2, type="means", se = TRUE)
## Design is unbalanced - use se.contrast() for se's
## Tables of means
## Grand mean
##
## 1183.471
##
## group
## Control Y_glucan
## 1099 1267
## rep 23 23
##
## time
## t1 t2 t3
## 912 1933 655.2
## rep 15 16 15.0
TukeyHSD(res.aov2, which = "time")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = score ~ group + time, data = my_data)
##
## $time
## diff lwr upr p adj
## t2-t1 1021.1610 426.8876 1615.4345 0.0004243
## t3-t1 -256.8019 -860.5843 346.9805 0.5603017
## t3-t2 -1277.9629 -1872.2364 -683.6895 0.0000151
# 1. Homogeneity of variances
plot(res.aov3, 1)
library(car)
leveneTest(score ~ group*time, data = my_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 2.7583 0.03119 *
## 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 2. Normality
plot(res.aov3, 2)
# 3. Skewness
#Skewness between -2 and +2 acceptable (George and Malley, 2010).
library(moments)
skewness(my_data$score, na.rm = TRUE)
## [1] 1.011202
# Extract the residuals
aov_residuals <- residuals(object = res.aov3)
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals)
##
## Shapiro-Wilk normality test
##
## data: aov_residuals
## W = 0.96949, p-value = 0.2653
# Rotate input file from folder 'input_data_vaccination_response' to perform the analysis for other immunological parameters:
my_data <- read.table("./input_data_vaccination_response/Vaccination_Salmonella_IgA_BG.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
my_data$id <- as.factor(my_data$id)
my_data$group <- as.factor(my_data$group)
my_data$time <- as.factor(my_data$time)
# Load the data
data("genderweight", package = "datarium")
# Show a sample of the data by group
set.seed(123)
my_data %>% sample_n_by(group, size = 2)
## # A tibble: 4 x 4
## id group time score
## <fct> <fct> <fct> <dbl>
## 1 15 Control t2 0.226
## 2 15 Control t1 0.0893
## 3 14 Y_Glucan t1 0.0675
## 4 3 Y_Glucan t1 0.144
my_data %>%
group_by(group) %>%
get_summary_stats(score, type = "mean_sd")
## # A tibble: 2 x 5
## group variable n mean sd
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 Control score 47 0.391 0.344
## 2 Y_Glucan score 45 0.342 0.249
# Save the data in two different vector
Y_glucan <- my_data %>%
filter(group == "Y_Glucan") %>%
pull(score)
Control <- my_data %>%
filter(group == "Control") %>%
pull(score)
# Compute t-test
res <- t.test(Y_glucan, Control)
res
##
## Welch Two Sample t-test
##
## data: Y_glucan and Control
## t = -0.78629, df = 83.872, p-value = 0.4339
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.17303530 0.07497459
## sample estimates:
## mean of x mean of y
## 0.3420867 0.3911170
# Levene
levene_test(score ~ group, data = my_data)
## # A tibble: 1 x 4
## df1 df2 statistic p
## <int> <int> <dbl> <dbl>
## 1 1 90 2.61 0.110
# Compute t-test
res <- t.test(score ~ group, data = my_data)
res
##
## Welch Two Sample t-test
##
## data: score by group
## t = 0.78629, df = 83.872, p-value = 0.4339
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.07497459 0.17303530
## sample estimates:
## mean in group Control mean in group Y_Glucan
## 0.3911170 0.3420867
stat.test <- my_data %>%
t_test(score ~ group) %>%
add_significance()
stat.test
## # A tibble: 1 x 9
## .y. group1 group2 n1 n2 statistic df p p.signif
## <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 score Control Y_Glucan 48 48 0.786 83.9 0.434 ns
my_data %>%
t_test(score ~ group, detailed = TRUE) %>%
add_significance()
## # A tibble: 1 x 16
## estimate estimate1 estimate2 .y. group1 group2 n1 n2 statistic p
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 0.0490 0.391 0.342 score Contr~ Y_Glu~ 48 48 0.786 0.434
## # ... with 6 more variables: df <dbl>, conf.low <dbl>, conf.high <dbl>,
## # method <chr>, alternative <chr>, p.signif <chr>
my_data %>% cohens_d(score ~ group, var.equal = FALSE)
## # A tibble: 1 x 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 score Control Y_Glucan 0.163 48 48 negligible
# Create a box-plot
bxp <- ggboxplot(
my_data, x = "group", y = "score",
ylab = "score", xlab = "Groups", add = "jitter"
)
# Add p-value and significance levels
stat.test <- stat.test %>% add_xy_position(x = "group")
bxp +
stat_pvalue_manual(stat.test, tip.length = 0) +
labs(subtitle = get_test_label(stat.test, detailed = TRUE))
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
## Warning: Removed 4 rows containing missing values (geom_point).
Vaccination response - two-way ANOVA
# Rotate input file from folder 'input_data_vaccination_response' to perform the analysis for other immunological parameters:
my_data <- read.table("./input_data_vaccination_response/Vaccination_Salmonella_IgA_BG.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
my_data$id <- as.factor(my_data$id)
my_data$group <- as.factor(my_data$group)
my_data$time <- as.factor(my_data$time)
# Inspect some random rows of the data by groups
set.seed(123)
my_data %>% sample_n_by(group, time, size = 1)
## # A tibble: 6 x 4
## id group time score
## <fct> <fct> <fct> <dbl>
## 1 15 Control t1 0.0893
## 2 15 Control t2 0.226
## 3 3 Control t3 1.01
## 4 14 Y_Glucan t1 0.0675
## 5 3 Y_Glucan t2 0.222
## 6 10 Y_Glucan t3 0.509
#Extreme outliers are removed from the analysis.
my_data %>%
group_by(time) %>%
identify_outliers(score)
## # A tibble: 4 x 6
## time id group score is.outlier is.extreme
## <fct> <fct> <fct> <dbl> <lgl> <lgl>
## 1 t1 4 Y_Glucan 0.238 TRUE FALSE
## 2 t1 5 Y_Glucan 0.304 TRUE FALSE
## 3 t1 11 Y_Glucan 0.256 TRUE FALSE
## 4 t2 16 Control 1.18 TRUE FALSE
#Jitter
my_data$group <- factor(my_data$group, levels = c("Control", "Y_Glucan", "Control_med", "Y_glucan_med"))
my_data %>%
filter(group %in% c("Control", "Y_glucan", "Control_med", "Y_glucan_med" )) %>%
ggplot(aes(x=time, y=score, fill = factor(group))) +
geom_boxplot() +
labs(fill="group") +
scale_fill_manual(values = c("#E69F00", "#669966", "#CCCCCC", "#999999")) +
geom_point(position=position_jitterdodge(),alpha=0.5, size = 5)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).
# Two-way ANOVA
# http://rstudio-pubs-static.s3.amazonaws.com/476761_eb23c47a75d6447688cb793ac8a3e461.html
res.aov2 <- aov(score ~ group + time, data = my_data)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 1 0.055 0.0553 1.032 0.312
## time 2 3.444 1.7219 32.156 3.29e-11 ***
## Residuals 88 4.712 0.0535
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 4 observations deleted due to missingness
# Two-way ANOVA with interaction effect
# These two calls are equivalent
res.aov3 <- aov(score ~ group * time, data = my_data)
res.aov3 <- aov(score ~ group + time + group:time, data = my_data)
summary(res.aov3)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 1 0.055 0.0553 1.039 0.311
## time 2 3.444 1.7219 32.384 3.28e-11 ***
## group:time 2 0.140 0.0698 1.313 0.274
## Residuals 86 4.573 0.0532
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 4 observations deleted due to missingness
require("dplyr")
group_by(my_data, group, time) %>%
summarise(
count = n(),
mean = mean(score, na.rm = TRUE),
sd = sd(score, na.rm = TRUE)
)
## `summarise()` regrouping output by 'group' (override with `.groups` argument)
## # A tibble: 6 x 5
## # Groups: group [2]
## group time count mean sd
## <fct> <fct> <int> <dbl> <dbl>
## 1 Control t1 16 0.0955 0.0225
## 2 Control t2 16 0.397 0.279
## 3 Control t3 16 0.662 0.343
## 4 Y_Glucan t1 16 0.138 0.0816
## 5 Y_Glucan t2 16 0.372 0.189
## 6 Y_Glucan t3 16 0.516 0.272
model.tables(res.aov2, type="means", se = TRUE)
## Design is unbalanced - use se.contrast() for se's
## Tables of means
## Grand mean
##
## 0.3671348
##
## group
## Control Y_Glucan
## 0.3911 0.3421
## rep 47.0000 45.0000
##
## time
## t1 t2 t3
## 0.1173 0.3845 0.5915
## rep 30.0000 31.0000 31.0000
TukeyHSD(res.aov2, which = "time")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = score ~ group + time, data = my_data)
##
## $time
## diff lwr upr p adj
## t2-t1 0.2672593 0.12597045 0.4085482 0.0000589
## t3-t1 0.4742513 0.33296238 0.6155402 0.0000000
## t3-t2 0.2069919 0.06686593 0.3471179 0.0019546
# 1. Homogeneity of variances
plot(res.aov3, 1)
library(car)
leveneTest(score ~ group*time, data = my_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 6.1382 6.558e-05 ***
## 86
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 2. Normality
plot(res.aov3, 2)
# 3. Skewness
#Skewness between -2 and +2 acceptable (George and Malley, 2010).
library(moments)
skewness(my_data$score, na.rm = TRUE)
## [1] 1.179768
# Extract the residuals
aov_residuals <- residuals(object = res.aov3)
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals)
##
## Shapiro-Wilk normality test
##
## data: aov_residuals
## W = 0.94218, p-value = 0.0004861
ile <- readRDS("./phyobjects/ps1.decontam.rds")
ile <- subset_samples(ile, Day_of_study_factor == "Day_27")
ile <- subset_samples(ile, Origin == "ileum_digesta")
ile <- microbiome::aggregate_taxa(ile, "Genus")
ile.r <- microbiome::transform(ile, "compositional")
#Prevalence filter:
ile.r <- filter_taxa(ile.r, function(x) sum(x > 0.001) > (0.50*length(x)), TRUE)
OTU1 = as(otu_table(ile.r), "matrix")
# transpose if necessary
if(taxa_are_rows(ile.r)){OTU1 <- t(OTU1)}
# Coerce to data.frame
OTUdf = as.data.frame(OTU1)
write.csv(OTUdf, file = "./output_data/OTUdf_all_treatments_day27_ileum_genera_filter_sum0.001_prev0.5.csv") #this file was used to manually combine genera abundances with day 27 ileum sample data
my_data <- read.table("./input_data/ileum_metadata_plus_genera_abundances_for_correlation_figure.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
my_data <- my_data
head(my_data, 6)
## Weight_birth Weight_day_14 Weight_at_day_26 pH_jejunum pH_ileum pH_caecum
## 1 1.28 4.38 7.9 6.06 7.11 6.18
## 2 1.68 5.90 10.0 7.10 8.40 6.32
## 3 2.13 4.67 7.8 7.05 8.03 6.08
## 4 1.85 5.73 NA 6.84 8.14 6.43
## 5 1.23 4.25 7.8 NA NA 6.10
## 6 2.07 6.12 9.6 7.66 8.71 6.52
## pH_colon CTL_perc_MLN_d27 CTL_ki67_MLN_d27 MemoryT_perc_MLN_d27
## 1 6.51 21.3 26.4 5.81
## 2 6.95 19.5 21.4 7.30
## 3 7.31 15.7 20.2 5.77
## 4 5.94 17.9 28.1 2.40
## 5 6.86 17.9 29.2 4.32
## 6 5.79 24.6 29.3 2.91
## MemoryT_perc_CD4highCD8low_MLN_d27 MemoryT_Ki67_CD4highCD8low_MLN_d27
## 1 43.1 45.5
## 2 73.4 28.8
## 3 54.5 37.0
## 4 76.0 51.9
## 5 52.0 35.4
## 6 55.2 39.6
## MemoryT_perc_CD4lowCD8high_MLN_d27 MemoryT_Ki67_CD4lowCD8high_MLN_d27
## 1 55.1 36.8
## 2 24.5 24.8
## 3 43.5 24.0
## 4 21.5 28.3
## 5 43.3 49.8
## 6 41.1 41.9
## HelperT_perc_MLN_d27 HelperT_Ki67_MLN_d27 Tregs_perc_MLN_d27
## 1 68.1 18.3 7.26
## 2 65.3 22.1 10.80
## 3 72.3 13.8 7.82
## 4 73.9 13.6 5.74
## 5 69.7 13.6 5.88
## 6 66.1 23.4 10.10
## Gammadelta_perc_MLN_d27 Gammadelta_Ki67_MLN_d27 cdc1_perc_MLN_d27
## 1 1.13 25.4 0.410
## 2 0.81 17.8 0.170
## 3 0.74 16.1 0.085
## 4 1.51 23.5 0.860
## 5 1.06 17.3 0.300
## 6 1.33 25.2 0.150
## cdc1_8086_MLN_d27 cdc2_perc_MLN_d27 cdc2_8086_MLN_d27 pdc_perc_MLN_d27
## 1 62.9 0.56 205 2.77
## 2 78.4 0.27 187 2.33
## 3 75.8 0.21 202 10.80
## 4 57.8 1.59 116 4.83
## 5 103.0 0.11 174 6.09
## 6 80.9 0.25 209 2.53
## pdc_8086_MLN_d27 mln_restimulation_il10_ConA_5_d27
## 1 127.0 2230.7110
## 2 65.5 777.3465
## 3 160.0 458.0275
## 4 142.0 189.4585
## 5 144.0 84.3885
## 6 130.0 691.9395
## mln_restimulation_il10_lps_10_d27 mln_restimulation_il10_medium_d27
## 1 21.7105 5.21850
## 2 13.7935 2.31525
## 3 5.7960 0.19950
## 4 9.9365 2.27500
## 5 12.3235 2.12975
## 6 30.1875 9.79300
## mln_restimulation_TNF_ConA_5_d27 mln_restimulation_TNF_LPS_10_d27
## 1 76.5730 8.9740
## 2 17.4125 4.2630
## 3 20.4855 3.6855
## 4 18.8195 2.3170
## 5 7.7980 0.9835
## 6 21.8295 3.8605
## mln_restimulation_TNF_medium_d27 Lactobacillus Streptococcus
## 1 7.59850 0.4396062 0.004990015
## 2 2.45000 0.5616382 0.073144223
## 3 1.91975 0.4489857 0.105183643
## 4 1.48400 0.7591559 0.020607325
## 5 1.34400 0.2453164 0.065894422
## 6 3.44050 0.5209840 0.039087397
## Clostridium_sensu_stricto_1 Romboutsia Terrisporobacter Turicibacter
## 1 0.08947352 0.1630176 0.02471804 0.25448293
## 2 0.04590598 0.1290964 0.07606056 0.07956653
## 3 0.06719298 0.1237184 0.08576225 0.05941798
## 4 0.02507603 0.1233235 0.05152122 0.01617940
## 5 0.25871272 0.2266204 0.08034537 0.06640560
## 6 0.11026820 0.1811106 0.03124736 0.08165567
## Veillonella Actinobacillus Haemophilus
## 1 0.000000000 0.000000000 0.013366670
## 2 0.008956175 0.001144223 0.007180876
## 3 0.005020589 0.015246009 0.037062541
## 4 0.000000000 0.001846910 0.000000000
## 5 0.007271007 0.010246979 0.026628045
## 6 0.001180438 0.000000000 0.007578169
# Summary of data
summary(my_data)
## Weight_birth Weight_day_14 Weight_at_day_26 pH_jejunum
## Min. :1.040 Min. :2.790 Min. : 5.030 Min. :6.060
## 1st Qu.:1.235 1st Qu.:4.290 1st Qu.: 7.800 1st Qu.:6.997
## Median :1.650 Median :5.000 Median : 8.575 Median :7.315
## Mean :1.548 Mean :4.839 Mean : 8.381 Mean :7.131
## 3rd Qu.:1.725 3rd Qu.:5.510 3rd Qu.: 9.400 3rd Qu.:7.407
## Max. :2.130 Max. :6.120 Max. :10.000 Max. :7.660
## NA's :1 NA's :3
## pH_ileum pH_caecum pH_colon CTL_perc_MLN_d27
## Min. :7.070 Min. :6.080 Min. :5.790 Min. :13.40
## 1st Qu.:7.300 1st Qu.:6.175 1st Qu.:6.470 1st Qu.:17.90
## Median :7.640 Median :6.390 Median :6.730 Median :19.60
## Mean :7.763 Mean :6.402 Mean :6.694 Mean :20.04
## 3rd Qu.:8.140 3rd Qu.:6.555 3rd Qu.:6.915 3rd Qu.:22.85
## Max. :8.710 Max. :7.010 Max. :7.730 Max. :26.40
## NA's :2 NA's :1
## CTL_ki67_MLN_d27 MemoryT_perc_MLN_d27 MemoryT_perc_CD4highCD8low_MLN_d27
## Min. :16.10 Min. :1.070 Min. :26.20
## 1st Qu.:24.02 1st Qu.:2.265 1st Qu.:52.40
## Median :27.25 Median :2.910 Median :57.00
## Mean :27.68 Mean :3.482 Mean :60.04
## 3rd Qu.:29.27 3rd Qu.:4.553 3rd Qu.:73.55
## Max. :45.20 Max. :7.300 Max. :90.50
## NA's :1 NA's :1 NA's :1
## MemoryT_Ki67_CD4highCD8low_MLN_d27 MemoryT_perc_CD4lowCD8high_MLN_d27
## Min. :28.80 Min. : 9.52
## 1st Qu.:37.00 1st Qu.:23.98
## Median :40.85 Median :39.40
## Mean :44.58 Mean :37.49
## 3rd Qu.:48.88 3rd Qu.:43.45
## Max. :69.80 Max. :71.30
## NA's :1 NA's :1
## MemoryT_Ki67_CD4lowCD8high_MLN_d27 HelperT_perc_MLN_d27 HelperT_Ki67_MLN_d27
## Min. :23.80 Min. :60.90 Min. :13.60
## 1st Qu.:30.27 1st Qu.:65.50 1st Qu.:15.28
## Median :42.85 Median :68.20 Median :18.80
## Mean :40.00 Mean :68.32 Mean :20.79
## 3rd Qu.:49.20 3rd Qu.:71.65 3rd Qu.:23.23
## Max. :55.80 Max. :73.90 Max. :33.20
## NA's :1 NA's :1 NA's :1
## Tregs_perc_MLN_d27 Gammadelta_perc_MLN_d27 Gammadelta_Ki67_MLN_d27
## Min. : 5.380 Min. :0.740 Min. :12.10
## 1st Qu.: 6.405 1st Qu.:1.175 1st Qu.:17.95
## Median : 8.030 Median :1.425 Median :22.75
## Mean : 8.475 Mean :1.678 Mean :23.34
## 3rd Qu.: 9.805 3rd Qu.:1.952 3rd Qu.:25.35
## Max. :14.500 Max. :4.140 Max. :44.80
## NA's :1 NA's :1 NA's :1
## cdc1_perc_MLN_d27 cdc1_8086_MLN_d27 cdc2_perc_MLN_d27 cdc2_8086_MLN_d27
## Min. :0.085 Min. : 25.7 Min. :0.110 Min. :116.0
## 1st Qu.:0.210 1st Qu.: 38.5 1st Qu.:0.285 1st Qu.:166.0
## Median :0.350 Median : 62.9 Median :0.520 Median :187.0
## Mean :0.371 Mean : 58.5 Mean :0.546 Mean :181.8
## 3rd Qu.:0.435 3rd Qu.: 72.6 3rd Qu.:0.665 3rd Qu.:200.5
## Max. :0.860 Max. :103.0 Max. :1.590 Max. :229.0
##
## pdc_perc_MLN_d27 pdc_8086_MLN_d27 mln_restimulation_il10_ConA_5_d27
## Min. : 1.490 Min. : 65.5 Min. : 84.39
## 1st Qu.: 2.650 1st Qu.:128.5 1st Qu.: 556.84
## Median : 4.620 Median :138.0 Median : 838.80
## Mean : 4.645 Mean :140.6 Mean : 854.96
## 3rd Qu.: 5.825 3rd Qu.:143.0 3rd Qu.: 973.74
## Max. :10.800 Max. :215.0 Max. :2230.71
## NA's :1
## mln_restimulation_il10_lps_10_d27 mln_restimulation_il10_medium_d27
## Min. : 0.000 Min. : 0.01925
## 1st Qu.: 6.465 1st Qu.: 2.28506
## Median :13.059 Median : 3.47637
## Mean :12.312 Mean : 5.66500
## 3rd Qu.:16.659 3rd Qu.: 8.89394
## Max. :30.188 Max. :17.21475
## NA's :1 NA's :1
## mln_restimulation_TNF_ConA_5_d27 mln_restimulation_TNF_LPS_10_d27
## Min. : 4.239 Min. : 0.9835
## 1st Qu.: 18.116 1st Qu.: 3.5332
## Median : 21.829 Median : 5.6840
## Mean : 43.060 Mean : 6.2064
## 3rd Qu.: 53.100 3rd Qu.: 7.6212
## Max. :174.409 Max. :18.9140
##
## mln_restimulation_TNF_medium_d27 Lactobacillus Streptococcus
## Min. : 1.344 Min. :0.1383 Min. :0.00225
## 1st Qu.: 2.185 1st Qu.:0.4170 1st Qu.:0.01484
## Median : 4.106 Median :0.5616 Median :0.02447
## Mean : 4.936 Mean :0.5801 Mean :0.05344
## 3rd Qu.: 6.857 3rd Qu.:0.7991 3rd Qu.:0.06952
## Max. :13.106 Max. :0.9066 Max. :0.21976
##
## Clostridium_sensu_stricto_1 Romboutsia Terrisporobacter
## Min. :0.00000 Min. :0.005936 Min. :0.007063
## 1st Qu.:0.02921 1st Qu.:0.033550 1st Qu.:0.027560
## Median :0.06719 Median :0.109685 Median :0.036324
## Mean :0.10622 Mean :0.098730 Mean :0.060560
## 3rd Qu.:0.18144 3rd Qu.:0.126407 3rd Qu.:0.083054
## Max. :0.28562 Max. :0.226620 Max. :0.183378
##
## Turicibacter Veillonella Actinobacillus Haemophilus
## Min. :0.00000 Min. :0.000000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.01423 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.005349
## Median :0.05942 Median :0.001180 Median :0.001127 Median :0.007578
## Mean :0.06570 Mean :0.006790 Mean :0.002613 Mean :0.013364
## 3rd Qu.:0.08061 3rd Qu.:0.006146 3rd Qu.:0.001989 3rd Qu.:0.012141
## Max. :0.25448 Max. :0.052143 Max. :0.015246 Max. :0.057469
##
# Turn into numeric value
my_data$cdc2_8086_MLN_d27<- as.numeric(my_data$cdc2_8086_MLN_d27)
str(my_data)
## 'data.frame': 15 obs. of 40 variables:
## $ Weight_birth : num 1.28 1.68 2.13 1.85 1.23 2.07 1.68 1.04 1.23 1.24 ...
## $ Weight_day_14 : num 4.38 5.9 4.67 5.73 4.25 6.12 5.07 4 3.8 4.33 ...
## $ Weight_at_day_26 : num 7.9 10 7.8 NA 7.8 9.6 8.96 7.1 7.53 8.3 ...
## $ pH_jejunum : num 6.06 7.1 7.05 6.84 NA 7.66 7.32 7.31 NA 7.39 ...
## $ pH_ileum : num 7.11 8.4 8.03 8.14 NA 8.71 7.64 7.55 7.62 7.07 ...
## $ pH_caecum : num 6.18 6.32 6.08 6.43 6.1 6.52 6.39 6.59 7.01 6.16 ...
## $ pH_colon : num 6.51 6.95 7.31 5.94 6.86 5.79 6.73 6.73 6.75 6.51 ...
## $ CTL_perc_MLN_d27 : num 21.3 19.5 15.7 17.9 17.9 24.6 19.7 23.1 NA 16.2 ...
## $ CTL_ki67_MLN_d27 : num 26.4 21.4 20.2 28.1 29.2 29.3 16.1 28.5 NA 24.1 ...
## $ MemoryT_perc_MLN_d27 : num 5.81 7.3 5.77 2.4 4.32 2.91 3.63 4.63 NA 1.07 ...
## $ MemoryT_perc_CD4highCD8low_MLN_d27: num 43.1 73.4 54.5 76 52 55.2 46.7 75.4 NA 73.6 ...
## $ MemoryT_Ki67_CD4highCD8low_MLN_d27: num 45.5 28.8 37 51.9 35.4 39.6 35 69.8 NA 41.1 ...
## $ MemoryT_perc_CD4lowCD8high_MLN_d27: num 55.1 24.5 43.5 21.5 43.3 41.1 51 23 NA 23.8 ...
## $ MemoryT_Ki67_CD4lowCD8high_MLN_d27: num 36.8 24.8 24 28.3 49.8 41.9 23.8 50.6 NA 46.8 ...
## $ HelperT_perc_MLN_d27 : num 68.1 65.3 72.3 73.9 69.7 66.1 68.3 65.1 NA 73.9 ...
## $ HelperT_Ki67_MLN_d27 : num 18.3 22.1 13.8 13.6 13.6 23.4 14.4 17.9 NA 22.7 ...
## $ Tregs_perc_MLN_d27 : num 7.26 10.8 7.82 5.74 5.88 10.1 14.5 6.12 NA 12 ...
## $ Gammadelta_perc_MLN_d27 : num 1.13 0.81 0.74 1.51 1.06 1.33 1.31 1.51 NA 1.78 ...
## $ Gammadelta_Ki67_MLN_d27 : num 25.4 17.8 16.1 23.5 17.3 25.2 12.1 22 NA 27 ...
## $ cdc1_perc_MLN_d27 : num 0.41 0.17 0.085 0.86 0.3 0.15 0.22 0.38 0.54 0.41 ...
## $ cdc1_8086_MLN_d27 : num 62.9 78.4 75.8 57.8 103 80.9 51.4 33.4 33.4 25.7 ...
## $ cdc2_perc_MLN_d27 : num 0.56 0.27 0.21 1.59 0.11 0.25 0.3 0.43 0.63 0.85 ...
## $ cdc2_8086_MLN_d27 : num 205 187 202 116 174 209 146 146 229 158 ...
## $ pdc_perc_MLN_d27 : num 2.77 2.33 10.8 4.83 6.09 2.53 5.56 4.62 1.49 2.92 ...
## $ pdc_8086_MLN_d27 : num 127 65.5 160 142 144 130 139 138 121 133 ...
## $ mln_restimulation_il10_ConA_5_d27 : num 2230.7 777.3 458 189.5 84.4 ...
## $ mln_restimulation_il10_lps_10_d27 : num 21.71 13.79 5.8 9.94 12.32 ...
## $ mln_restimulation_il10_medium_d27 : num 5.22 2.32 0.2 2.27 2.13 ...
## $ mln_restimulation_TNF_ConA_5_d27 : num 76.6 17.4 20.5 18.8 7.8 ...
## $ mln_restimulation_TNF_LPS_10_d27 : num 8.974 4.263 3.686 2.317 0.984 ...
## $ mln_restimulation_TNF_medium_d27 : num 7.6 2.45 1.92 1.48 1.34 ...
## $ Lactobacillus : num 0.44 0.562 0.449 0.759 0.245 ...
## $ Streptococcus : num 0.00499 0.07314 0.10518 0.02061 0.06589 ...
## $ Clostridium_sensu_stricto_1 : num 0.0895 0.0459 0.0672 0.0251 0.2587 ...
## $ Romboutsia : num 0.163 0.129 0.124 0.123 0.227 ...
## $ Terrisporobacter : num 0.0247 0.0761 0.0858 0.0515 0.0803 ...
## $ Turicibacter : num 0.2545 0.0796 0.0594 0.0162 0.0664 ...
## $ Veillonella : num 0 0.00896 0.00502 0 0.00727 ...
## $ Actinobacillus : num 0 0.00114 0.01525 0.00185 0.01025 ...
## $ Haemophilus : num 0.01337 0.00718 0.03706 0 0.02663 ...
# P.adjust: Calculated using Benjamini-Hockberg (BH) method
cors<-cor(my_data)
cor.mtest <- function(mat, conf.level = 0.95) {
mat <- as.matrix(mat)
n <- ncol(mat)
p.mat <- lowCI.mat <- uppCI.mat <- matrix(NA, n, n)
diag(p.mat) <- 0
diag(lowCI.mat) <- diag(uppCI.mat) <- 1
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
tmp <- cor.test(mat[, i], mat[, j], conf.level = conf.level)
p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
lowCI.mat[i, j] <- lowCI.mat[j, i] <- tmp$conf.int[1]
uppCI.mat[i, j] <- uppCI.mat[j, i] <- tmp$conf.int[2]
}
}
return(list(p.mat, lowCI.mat, uppCI.mat))
}
res1 <- cor.mtest(my_data, 0.95)
#You can vectorize your p-value matrix (e.g. res1), which has been returned by cor.mtest function. Afterwards, p.adjust function is used to get the adjusted p-values.
pAdj <- p.adjust(c(res1[[1]]), method = "BH")
#Then create the original matrix with adjusted p-values.
resAdj <- matrix(pAdj, ncol = dim(res1[[1]])[1])
#Then the corrplot function is used to create the plot containing adjusted p-values.
corrplot(cors, p.mat = resAdj, tl.col = "black", sig.level=0.05, insig="blank", cl.align="r", tl.cex=0.6, order="original", na.label="square", na.label.col = "white", type="lower", tl.srt=60, cl.ratio=0.1)
# Save as pdf
pdf("./figures/Correlation_plot.pdf", width=10, height=10)
corrplot(cors, p.mat = resAdj, sig.level=0.05, insig="blank", cl.align="r", tl.cex=0.6, order="original", na.label="square", na.label.col = "white", tl.col = "black", type="lower", tl.srt=60, cl.ratio=0.1)
dev.off() #The final figure can be found as Supplementary Figure S9 of the manuscript
## png
## 2
Figure was generated using code below, and was then manually exported. This is why it is not showing up in the ‘figures’ folder. The unprocessed figure can be found under its individual folder.
my_data <- read.table("./input_data/pH_of_GI_segments.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
# Convert 'id', 'group', 'time' and 'treatment' to factors:
my_data$id <- as.factor(my_data$id)
my_data$group <- factor(my_data$group, levels = c("jejunum", "ileum", "caecum", "colon"))
my_data$time <- as.factor(my_data$time)
my_data$treatment <- as.factor(my_data$treatment)
p <- ggplot(my_data, aes(x=time, y=score, fill = factor(group))) +
geom_boxplot(outlier.shape = NA) + scale_fill_brewer(palette="PRGn") + labs(fill="group")
p <- p + geom_point(position=position_jitterdodge(jitter.width = 0.2), aes(fill = factor(group), shape=treatment), alpha=0.7, size = 3)
p
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Warning: Removed 14 rows containing missing values (geom_point).
version
## _
## platform x86_64-w64-mingw32
## arch x86_64
## os mingw32
## system x86_64, mingw32
## status
## major 3
## minor 6.1
## year 2019
## month 07
## day 05
## svn rev 76782
## language R
## version.string R version 3.6.1 (2019-07-05)
## nickname Action of the Toes
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19041)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_Netherlands.1252 LC_CTYPE=English_Netherlands.1252
## [3] LC_MONETARY=English_Netherlands.1252 LC_NUMERIC=C
## [5] LC_TIME=English_Netherlands.1252
##
## attached base packages:
## [1] splines parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] moments_0.14 gamlss_5.1-7
## [3] gamlss.dist_5.1-7 gamlss.data_5.1-4
## [5] datarium_0.1.0 rstatix_0.6.0
## [7] forcats_0.5.0 stringr_1.4.0
## [9] readr_1.3.1 tidyr_1.1.1
## [11] tibble_3.0.3 tidyverse_1.3.0
## [13] corrplot_0.84 metamicrobiomeR_1.1
## [15] ggvegan_0.1-0 psych_2.0.7
## [17] DESeq2_1.26.0 SummarizedExperiment_1.16.1
## [19] DelayedArray_0.12.3 BiocParallel_1.20.1
## [21] matrixStats_0.56.0 Biobase_2.46.0
## [23] GenomicRanges_1.38.0 GenomeInfoDb_1.22.1
## [25] IRanges_2.20.2 S4Vectors_0.24.4
## [27] BiocGenerics_0.32.0 DT_0.13
## [29] propr_4.2.6 CoDaSeq_0.99.6
## [31] car_3.0-8 carData_3.0-4
## [33] zCompositions_1.3.4 truncnorm_1.0-8
## [35] NADA_1.6-1.1 compositions_2.0-0
## [37] ALDEx2_1.18.0 RColorBrewer_1.1-2
## [39] viridis_0.5.1 viridisLite_0.3.0
## [41] purrr_0.3.4 data.table_1.12.8
## [43] multcompView_0.1-8 multcomp_1.4-13
## [45] TH.data_1.0-10 MASS_7.3-51.4
## [47] mvtnorm_1.1-1 emmeans_1.4.8
## [49] picante_1.8.2 vegan_2.5-6
## [51] permute_0.9-5 Hmisc_4.4-0
## [53] Formula_1.2-3 survival_3.1-12
## [55] lattice_0.20-38 decontam_1.6.0
## [57] pheatmap_1.0.12 apeglm_1.8.0
## [59] sciplot_1.2-0 lme4_1.1-23
## [61] Matrix_1.2-17 nlme_3.1-140
## [63] ggrepel_0.8.2 knitr_1.29
## [65] scales_1.1.1 reshape2_1.4.4
## [67] ape_5.4 dplyr_1.0.1
## [69] plyr_1.8.6 ggpubr_0.4.0
## [71] microbiomeutilities_0.99.02 microbiome_2.1.26
## [73] ggplot2_3.3.2 phyloseq_1.30.0
##
## loaded via a namespace (and not attached):
## [1] utf8_1.1.4 tidyselect_1.1.0 RSQLite_2.2.0
## [4] AnnotationDbi_1.48.0 htmlwidgets_1.5.1 grid_3.6.1
## [7] Rtsne_0.15 munsell_0.5.0 codetools_0.2-16
## [10] statmod_1.4.34 withr_2.2.0 colorspace_1.4-1
## [13] rstudioapi_0.11 robustbase_0.93-6 bayesm_3.1-4
## [16] ggsignif_0.6.0 labeling_0.3 bbmle_1.0.23.1
## [19] GenomeInfoDbData_1.2.2 mnormt_1.5-7 farver_2.0.3
## [22] bit64_4.0.2 rhdf5_2.30.1 coda_0.19-3
## [25] vctrs_0.3.2 generics_0.0.2 xfun_0.16
## [28] R6_2.4.1 locfit_1.5-9.4 bitops_1.0-6
## [31] assertthat_0.2.1 nnet_7.3-12 gtable_0.3.0
## [34] sandwich_2.5-1 rlang_0.4.7 genefilter_1.68.0
## [37] acepack_1.4.1 broom_0.7.0 checkmate_2.0.0
## [40] modelr_0.1.8 yaml_2.2.1 abind_1.4-5
## [43] backports_1.1.8 tensorA_0.36.1 tools_3.6.1
## [46] ellipsis_0.3.1 biomformat_1.14.0 Rcpp_1.0.5
## [49] base64enc_0.1-3 zlibbioc_1.32.0 RCurl_1.98-1.2
## [52] rpart_4.1-15 zoo_1.8-8 haven_2.3.1
## [55] cluster_2.1.0 fs_1.4.1 magrittr_1.5
## [58] openxlsx_4.1.5 reprex_0.3.0 hms_0.5.3
## [61] evaluate_0.14 xtable_1.8-4 XML_3.99-0.3
## [64] rio_0.5.16 emdbook_1.3.12 jpeg_0.1-8.1
## [67] readxl_1.3.1 gridExtra_2.3 compiler_3.6.1
## [70] bdsmatrix_1.3-4 crayon_1.3.4 minqa_1.2.4
## [73] htmltools_0.5.0 mgcv_1.8-28 geneplotter_1.64.0
## [76] lubridate_1.7.9 DBI_1.1.0 dbplyr_1.4.4
## [79] boot_1.3-22 ade4_1.7-15 cli_2.0.2
## [82] gdata_2.18.0 igraph_1.2.5 pkgconfig_2.0.3
## [85] numDeriv_2016.8-1.1 foreign_0.8-71 xml2_1.3.2
## [88] foreach_1.5.0 annotate_1.64.0 multtest_2.42.0
## [91] XVector_0.26.0 estimability_1.3 rvest_0.3.6
## [94] digest_0.6.25 Biostrings_2.54.0 rmarkdown_2.3
## [97] cellranger_1.1.0 htmlTable_2.0.1 curl_4.3
## [100] gtools_3.8.2 nloptr_1.2.2.1 lifecycle_0.2.0
## [103] jsonlite_1.6.1 Rhdf5lib_1.8.0 fansi_0.4.1
## [106] pillar_1.4.6 httr_1.4.2 DEoptimR_1.0-8
## [109] glue_1.4.1 zip_2.0.4 png_0.1-7
## [112] iterators_1.0.12 bit_4.0.4 stringi_1.4.6
## [115] blob_1.2.1 latticeExtra_0.6-29 memoise_1.1.0