This HTML document contains the R-code that was used to perform analyses and create figures for the corresponding manuscript. At the end of the document the R version and package versions can be found. Should one want to run this R project in R-studio, it is advised to use the same R version and package versions as have been used here.
#Warnings and messages have been inactivated for this markdown
library(phyloseq)
library(microbiome)
library(microbiomeutilities)
library(ggplot2)
library(ggpubr)
library(plyr)
library(dplyr)
library(ape)
library(reshape2)
library(scales)
library(knitr)
library(ggrepel)
library(nlme)
library(lme4)
library(sciplot)
library(apeglm)
library(pheatmap)
library(decontam)
library(Hmisc)
library(picante)
library(emmeans)
library(multcomp)
library(multcompView)
library(data.table)
library(purrr)
library(viridis)
library(RColorBrewer)
library(ALDEx2)
library(compositions)
library(zCompositions)
library(CoDaSeq)
library(propr)
library(vegan)
library(DT)
library(DESeq2)
library(psych)
library(car)
library(ggvegan)
library(metamicrobiomeR)
library(corrplot)
library(tidyverse)
library(rstatix)
library(datarium)
library(tidyverse)
library(ggpubr)
# Create Folders as following
# Figures
dir.create("figures")
# Phyloseq objects
dir.create("phyobjects")
# Phyloseq objects
dir.create("output_data")
Loading Illumina sequencing data of library 1-14. Data was run through NGTax 2.0 and SILVA database version 132 was used as reference database.
Create phyloseq object
# create phyloseq object of all libraries
ps <- read_phyloseq(otu.file = "./input_data/Galaxy58-[NG-Tax__porcine_study_all_samples_eightrun_length70bp_11march2020].biom1",
taxonomy.file = NULL,
metadata.file = "./input_data/Metadata_porcine_study_june_2020.csv",
type = "biom")
## Time to complete depends on OTU file size
# create tree object
treefile <- read.tree("./input_data/all_otus_eightrun_length70bp_11march2020.tre")
# merge tree and phyloseq object
ps1.all <- merge_phyloseq(ps,treefile)
print(ps1.all)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4157 taxa and 490 samples ]
## sample_data() Sample Data: [ 490 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 4157 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 4157 tips and 4156 internal nodes ]
# export taxonomy table (for Windows)
taxonomy_table_ps1 <- as.data.frame(ps1.all@tax_table)
write.csv(taxonomy_table_ps1, file = "./output_data/taxonomy_table_ps1.csv", fileEncoding = "UTF-16LE")
ntaxa(ps1.all)
## [1] 4157
Remove pattern in taxa names
# remove pattern in taxa names
tax_table(ps1.all)[,colnames(tax_table(ps1.all))] <- gsub(tax_table(ps1.all)[,colnames(tax_table(ps1.all))],pattern="[a-z]__",replacement="")
Export phyloseq object
# this is the raw, unfiltered phyloseq file
saveRDS(ps1.all, "./phyobjects/ps1_all.rds")
ps1.all <- readRDS("./phyobjects/ps1_all.rds")
# create new object to work with
ps1 <- ps1.all
Removing mitochondrial and chloroplast reads
# replace empty fields by "Unmatched_level"
# this is needed because otherwise the microbiome::aggregate_taxa() function will not run
tax_table(ps1)[tax_table(ps1)[,"Phylum"]== "","Phylum"] <- "Unmatched_phylum"
tax_table(ps1)[tax_table(ps1)[,"Class"]== "","Class"] <- "Unmatched_class"
tax_table(ps1)[tax_table(ps1)[,"Order"]== "","Order"] <- "Unmatched_order"
tax_table(ps1)[tax_table(ps1)[,"Family"]== "","Family"] <- "Unmatched_family"
tax_table(ps1)[tax_table(ps1)[,"Genus"]== "","Genus"] <- "Unmatched_genus"
# remove mitochondria and chloroplasts
ps1 <- subset_taxa(ps1, Family != "Mitochondria")
ps1 <- subset_taxa(ps1, Class != "Chloroplast") #No Chloroplasts found in class Chloroplasts
ps1.mitoch <- subset_taxa(ps1.all, Family == "Mitochondria")
ps1.mitoch <- prune_samples(sample_sums(ps1.mitoch)>0,ps1.mitoch)
sort(sample_sums(ps1.mitoch))
## m1338 m3041 m3019 m3073 m2207 m3023ctrl m3012 m3013
## 198 242 268 278 344 379 829 1191
## m1318 m3024 m3037 m3025 m3018 m3040 m2206 m3015
## 2110 2731 4951 5077 5363 9682 20914 21652
## m1323 m2194 m3021
## 23240 33679 72570
# There are however Oxyphotobacteria (Dutch translation: blauwalgen) present in the dataset, which are not removed from the dataset:
ps1.chloro <- subset_taxa(ps1.all, Order == "Chloroplast")
ps1.chloro <- prune_samples(sample_sums(ps1.chloro)>0,ps1.chloro)
sort(sample_sums(ps1.chloro))
## water.blank.9 water.blank.13 m1330 m2190 m2207
## 2 36 237 259 277
## m1398 m1356 m1333 m3041 m3026
## 309 546 607 700 725
## m3073 m3013 m2232 m3036 m3019
## 726 733 1208 1430 2380
## m1318 m3018 m3024 m3025 m3012
## 2832 3166 3246 3417 4065
## m2206 m3015 m3040 m3037 m2194
## 10324 11572 11760 18781 49513
## m1323 m3021
## 60720 81622
print(ps1)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4123 taxa and 490 samples ]
## sample_data() Sample Data: [ 490 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 4123 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 4123 tips and 4122 internal nodes ]
ntaxa(ps1.all)-ntaxa(ps1) # 34 ASVs removed which were belonging to mitochondria
## [1] 34
Remove contaminant reads, identified by visual inspection of each ASV individually using correlation plots [DNA_reading] vs rel. ASV abundance. Resulted in 8 ASVs identified as contaminants.
# remove samples that have no reads. Does not do anything as in this dataset there are reads in every sample:
psx <- prune_samples(sample_sums(ps1)>0,ps1)
# add higher taxa names
psx.tax <- as.data.frame(psx@tax_table)
psx.tax$OTU <- rownames(psx.tax)
# import contaminant OTU table
visContam <- read.delim("./input_data/Contaminant_OTU_by_visual_identification.txt",header=T)
psx.tax2 <- psx.tax
rownames(psx.tax2) <- NULL # reset rownames for subset based on index
psx.tax2$contam <- rownames(psx.tax2) %in% visContam$OTU
table(psx.tax2$contam) # works: 8 OTUs T, rest F
##
## FALSE TRUE
## 4115 8
rownames(psx.tax2) <- psx.tax2$OTU # restore OTU as rownames
psx.tax2$reads <- taxa_sums(psx)
# calculate % contaminant reads
contam.otu2 <- subset(psx.tax2, contam == "TRUE")
(sum(contam.otu2$reads) / sum(abundances(psx)))
## [1] 0.000306176
# 0.0306% of all reads
# subset phyloseq to contaminant OTUs
psx.sub <- subset_taxa(psx, psx.tax2$contam)
ps1.decontam <- prune_taxa(!psx.tax2$contam, ps1)
ps1.decontam
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4115 taxa and 490 samples ]
## sample_data() Sample Data: [ 490 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 4115 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 4115 tips and 4114 internal nodes ]
ps1.decontam <- prune_samples(sample_sums(ps1.decontam)>0, ps1.decontam)
ps1.decontam # 4115 taxa, 489 samples
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4115 taxa and 489 samples ]
## sample_data() Sample Data: [ 489 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 4115 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 4115 tips and 4114 internal nodes ]
# samples with 0 reads after removing contaminants: 1 (water.blank.10)
# (code in following 2 lines only runs if not running prune_samples command above, used to identify removed samples)
#sample0 <- subset_samples(ps1.decontam, sample_sums(ps1.decontam)==0)
#sample_data(sample0)
ntaxa(ps1)-ntaxa(ps1.decontam)
## [1] 8
# 8 OTUs removed (resulting from the visual identification of contaminants)
sum(abundances(ps1)) - sum(abundances(ps1.decontam))
## [1] 26819
# 26819 reads removed.
# dataset without mitochondrial/chloroplast reads, but with contaminants
saveRDS(ps1, "./phyobjects/ps1.rds")
ps1 <- readRDS("./phyobjects/ps1.rds")
# decontaminated dataset
saveRDS(ps1.decontam, "./phyobjects/ps1.decontam.rds")
ps1.decontam <- readRDS("./phyobjects/ps1.decontam.rds")
# all following objects are without contaminant ASVs:
ps1.unique <- subset_samples(ps1.decontam, Unique == "yes")
ps1.noROM <- subset_samples(ps1.unique, Treatment != "ROM")
ps1.EcN <- subset_samples(ps1.noROM, Treatment != "Y_glucan")
ps1.EcN <- prune_taxa(taxa_sums(otu_table(ps1.EcN))>0, ps1.EcN)
ps1.faeces <- subset_samples(ps1.EcN, Origin == "faeces")
ps1.faeces <- prune_taxa(taxa_sums(otu_table(ps1.faeces))>0, ps1.faeces)
ps1.faeces.pre <- subset_samples(ps1.faeces, Pre_or_post_weaning == "Pre_weaning")
ps1.faeces.pre <- prune_taxa(taxa_sums(otu_table(ps1.faeces.pre))>0, ps1.faeces.pre)
ps1.faeces.post <- subset_samples(ps1.faeces, Pre_or_post_weaning == "Post_weaning")
ps1.faeces.post <- prune_taxa(taxa_sums(otu_table(ps1.faeces.post))>0, ps1.faeces.post)
ps1.digesta <- subset_samples(ps1.EcN, Origin %in% c("jejunum_digesta", "ileum_digesta", "caecum_digesta"))
ps1.digesta <- prune_taxa(taxa_sums(otu_table(ps1.digesta))>0, ps1.digesta)
ps1.jejunum.digesta <- subset_samples(ps1.EcN, Origin %in% c("jejunum_digesta"))
ps1.jejunum.digesta <- prune_taxa(taxa_sums(otu_table(ps1.jejunum.digesta))>0, ps1.jejunum.digesta)
ps1.ileum.digesta <- subset_samples(ps1.EcN, Origin %in% c("ileum_digesta"))
ps1.ileum.digesta <- prune_taxa(taxa_sums(otu_table(ps1.ileum.digesta))>0, ps1.ileum.digesta)
ps1.caecum.digesta <- subset_samples(ps1.EcN, Origin %in% c("caecum_digesta"))
ps1.caecum.digesta <- prune_taxa(taxa_sums(otu_table(ps1.caecum.digesta))>0, ps1.caecum.digesta)
ps1.swabs <- subset_samples(ps1.faeces, Swab %in% c("swab"))
ps1.swabs <- prune_taxa(taxa_sums(otu_table(ps1.swabs))>0, ps1.swabs)
ps1.no.swabs <- subset_samples(ps1.faeces, Swab %in% c("no_swab"))
ps1.no.swabs <- prune_taxa(taxa_sums(otu_table(ps1.no.swabs))>0, ps1.no.swabs)
ps1.enterobacteriaceae <- subset_taxa(ps1.EcN, Family == "Enterobacteriaceae")
print(ps1.faeces) # 287 samples
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2305 taxa and 286 samples ]
## sample_data() Sample Data: [ 286 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 2305 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2305 tips and 2304 internal nodes ]
# filter out archaea and place in phyloseq object
ps1.archaea <- subset_taxa(ps1.EcN, Phylum == "Euryarchaeota")
ps1.archaea
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 28 taxa and 420 samples ]
## sample_data() Sample Data: [ 420 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 28 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 28 tips and 27 internal nodes ]
# negative controls - includes contaminant reads (ps1)
ps1.contr <- subset_samples(ps1, Origin %in% c("blank"))
ps1.contr <- prune_taxa(taxa_sums(otu_table(ps1.contr))>0, ps1.contr)
print(ps1.contr)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 202 taxa and 14 samples ]
## sample_data() Sample Data: [ 14 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 202 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 202 tips and 201 internal nodes ]
# positive controls (mocks) - with contaminant reads (ps1)
ps1.mock <- subset_samples(ps1, Origin %in% c("mock.3","mock.4"))
ps1.mock <- prune_taxa(taxa_sums(otu_table(ps1.mock))>0, ps1.mock)
print(ps1.mock)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 112 taxa and 26 samples ]
## sample_data() Sample Data: [ 26 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 112 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 112 tips and 111 internal nodes ]
# technical duplicates (PCR) - includes contaminant reads (ps1)
ps1.tech <- subset_samples(ps1, Description %in% c("m1858", "m1858R1","m1858R2","m1858R3", "m2158", "m2158R1", "m2158R2", "m2158R3", "m169", "m169R1", "m169R2", "m169R3", "m169R4", "m169R5", "m169R6", "m924", "m924R1", "m924R2", "m924R3", "m924R4", "m924R5", "m924R6"))
ps1.tech <- prune_taxa(taxa_sums(otu_table(ps1.tech))>0, ps1.tech)
print(ps1.tech)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 345 taxa and 22 samples ]
## sample_data() Sample Data: [ 22 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 345 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 345 tips and 344 internal nodes ]
# subset mitochondrial sequences
ps1.mit <- subset_taxa(ps1.all, Family == "Mitochondria")
print(ps1.mit)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 34 taxa and 490 samples ]
## sample_data() Sample Data: [ 490 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 34 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 34 tips and 33 internal nodes ]
# subset chloroplast sequences
#ps1.chloro <- subset_taxa(ps1.all, Class == "Chloroplast") #does not run, since there do not seem to be chloroplast reads present
Saving subsets in compressed formats (RDS).
# with contaminants
saveRDS(ps1.decontam, "./phyobjects/ps1.decontam.rds")
saveRDS(ps1.unique, "./phyobjects/ps1.unique.rds")
saveRDS(ps1.all, "./phyobjects/ps1.all.rds")
saveRDS(ps1.tech, "./phyobjects/ps1.tech.rds")
saveRDS(ps1.contr, "./phyobjects/ps1.contr.rds")
saveRDS(ps1.mock, "./phyobjects/ps1.mock.rds")
saveRDS(ps1.mit, "./phyobjects/ps1.mit.rds")
saveRDS(ps1.faeces, "./phyobjects/ps1.faeces.rds")
saveRDS(ps1.faeces.pre, "./phyobjects/ps1.faeces.pre.rds")
saveRDS(ps1.faeces.post, "./phyobjects/ps1.faeces.post.rds")
saveRDS(ps1.EcN, "./phyobjects/ps1.EcN.rds")
saveRDS(ps1.digesta, "./phyobjects/ps1.digesta.rds")
saveRDS(ps1.jejunum.digesta, "./phyobjects/ps1.jejunum.digesta.rds")
saveRDS(ps1.ileum.digesta, "./phyobjects/ps1.ileum.digesta.rds")
saveRDS(ps1.caecum.digesta, "./phyobjects/ps1.caecum.digesta.rds")
saveRDS(ps1.swabs, "./phyobjects/ps1.swabs.rds")
saveRDS(ps1.no.swabs, "./phyobjects/ps1.no.swabs.rds")
saveRDS(ps1.archaea, "./phyobjects/ps1.archaea.rds")
saveRDS(ps1.enterobacteriaceae, "./phyobjects/ps1.enterobacteriaceae.rds")
# load full datasets from RDS
ps1.all <- readRDS("./phyobjects/ps1.all.rds")
Exploring the stats of the datasets: # reads, ASVs, nr of genera, etc.
sum(sample_sums(ps1.all)) # raw phyloseq
## [1] 87799119
sum(sample_sums(ps1)) # excl mitrochondria/chloroplast, incl contaminants
## [1] 87593421
sum(sample_sums(ps1.decontam)) # excl mitrochondria/chloroplast, excl contaminants
## [1] 87566602
print(ps1)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 4123 taxa and 490 samples ]
## sample_data() Sample Data: [ 490 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 4123 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 4123 tips and 4122 internal nodes ]
ntaxa(aggregate_taxa(ps1,"Genus"))
## [1] 429
colnames(tax_table(ps1))
## [1] "Domain" "Phylum" "Class" "Order" "Family" "Genus"
ps1.faeces <- readRDS("./phyobjects/ps1.faeces.rds") #all faecal samples
# this line is used to make sure the order of days is chronological:
ps1.faeces@sam_data$Day_of_study_factor <- factor(ps1.faeces@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
# this line is used to make sure pre-weaning is followed by post-weaning:
ps1.faeces@sam_data$Pre_or_post_weaning <- factor(ps1.faeces@sam_data$Pre_or_post_weaning, levels = c("Pre-weaning", "Post-weaning"))
ps1.digesta <- readRDS("./phyobjects/ps1.digesta.rds") #all digesta samples
# this line is used to reorder segments according order luminal content passages through the GI tract of the animals:
ps1.digesta@sam_data$Origin <- factor(ps1.digesta@sam_data$Origin, levels = c("jejunum_digesta", "ileum_digesta", "caecum_digesta"))
print(ps1.digesta) #contains 1608 taxa in 134 samples
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1626 taxa and 134 samples ]
## sample_data() Sample Data: [ 134 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 1626 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1626 tips and 1625 internal nodes ]
ps1.EcN <- readRDS("./phyobjects/ps1.EcN.rds") #all faecal and digestal samples of the current study
# this line is used to combine faeces and digesta in an orderly fashion for use in the alpha-diversity plots:
ps1.EcN@sam_data$Origin_Day <- factor(ps1.EcN@sam_data$Origin_Day, levels = c("faeces_Day_4", "faeces_Day_8", "faeces_Day_14", "faeces_Day_26", "faeces_Day_35", "faeces_Day_43", "faeces_Day_59", "faeces_Day_69", "jejunum_digesta_Day_27", "jejunum_digesta_Day_44", "jejunum_digesta_Day_70", "ileum_digesta_Day_27", "ileum_digesta_Day_44", "ileum_digesta_Day_70", "caecum_digesta_Day_27", "caecum_digesta_Day_44", "caecum_digesta_Day_70"))
ps1.proteobacteria <- readRDS("./phyobjects/ps1.EcN.rds")
ps1.proteobacteria <- subset_taxa(ps1.proteobacteria, Phylum == "Proteobacteria")
ps1.proteobacteria <- subset_samples(ps1.proteobacteria, Day_of_study_factor %in% c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27"))
ps1.proteobacteria <- subset_samples(ps1.proteobacteria, Origin %in% c("faeces"))
# this line is used to make sure the order of days is chronological:
ps1.proteobacteria@sam_data$Day_of_study_factor <- factor(ps1.proteobacteria@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
# this line is used to make sure pre-weaning is followed by post-weaning:
ps1.proteobacteria@sam_data$Pre_or_post_weaning <- factor(ps1.proteobacteria@sam_data$Pre_or_post_weaning, levels = c("Pre-weaning", "Post-weaning"))
ps1.proteobacteria@sam_data$Origin_Day <- factor(ps1.proteobacteria@sam_data$Origin_Day, levels = c("faeces_Day_4", "faeces_Day_8", "faeces_Day_14", "faeces_Day_26", "faeces_Day_35", "faeces_Day_43", "faeces_Day_59", "faeces_Day_69", "jejunum_digesta_Day_27", "jejunum_digesta_Day_44", "jejunum_digesta_Day_70", "ileum_digesta_Day_27", "ileum_digesta_Day_44", "ileum_digesta_Day_70", "caecum_digesta_Day_27", "caecum_digesta_Day_44", "caecum_digesta_Day_70"))
p <- plot_richness(ps1.faeces, "Day_of_study_factor", color = "Treatment", measures = "Shannon")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Day_of_study")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha=1, size=1.5) #+ theme_bw()
p$layers <- p$layers[-1]
print (p) #this plot is created for illustrative purposes, and is not saved or used in the manuscript
p <- plot_richness(ps1.digesta, "Day_of_study_factor", color = "Treatment", measures = "Shannon")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Day_of_study")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha=1, size=1.5)
p <- p + facet_wrap("Origin")
p$layers <- p$layers[-1]
print (p) #this plot is created for illustrative purposes, and is not saved or used in the manuscript
p <- plot_richness(ps1.EcN, "Origin_Day", color = "Treatment", measures = "InvSimpson")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Origin_Day")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha = .9, size=2)
p$layers <- p$layers[-1]
#The final figure can be found under panel A in Figure 2 of the manuscript
print(p)
ggsave("./figures/alpha_diversity_faeces_and_digesta_inv_simpson4_no_theme_h7_w11_size2_original.pdf", height = 7, width = 11)
# following figures combine the faeces and digesta alpha diversities
p <- plot_richness(ps1.EcN, "Origin_Day", color = "Treatment", measures = "Shannon")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Origin_Day")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha = .9, size=2)
p$layers <- p$layers[-1]
#The final figure can be found under panel B in Figure 2 of the manuscript
print(p)
ggsave("./figures/alpha_diversity_faeces_and_digesta_shannon4_no_theme_h7_w11_size2_original.pdf", height = 7, width = 11)
p <- plot_richness(ps1.proteobacteria, "Origin_Day", color = "Treatment", measures = "Observed") + geom_boxplot(outlier.shape = NA, aes(fill = "Origin_Day")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578")) + geom_point(shape = 21, aes(alpha = sqrt(EcN_NGS_relative)), position=position_jitterdodge(seed = 456), size=0.5, stroke = 2) + geom_point(position=position_jitterdodge(seed = 456), size = 0.5)
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
#p <- p + geom_point(position=position_jitterdodge(), alpha = .9, size=2)
p$layers <- p$layers[-1]
#The final figure can be found under panel B in Figure 2 of the manuscript
print(p)
ggsave("./figures/alpha_diversity_faeces_observed_proteobacteria.pdf", height = 7, width = 11)
Loading data for LME (Linear Mixed-Effects models)
ps1 <- readRDS("./phyobjects/ps1.EcN.rds")
ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))
ps1.fcs <- subset_samples(ps1, Origin %in% c("faeces"))
ps1.fcs.pre <- subset_samples(ps1.fcs, Pre_or_post_weaning %in% c("Pre_weaning"))
ps1.fcs.pre.proteobacteria <- subset_taxa(ps1.fcs.pre, Phylum == "Proteobacteria")
ps1.fcs <- prune_taxa(taxa_sums(otu_table(ps1.fcs))>0, ps1.fcs)
ps1.fcs.pre <- prune_taxa(taxa_sums(otu_table(ps1.fcs.pre))>0, ps1.fcs.pre)
ps1.fcs.pre.proteobacteria <- prune_taxa(taxa_sums(otu_table(ps1.fcs.pre.proteobacteria))>0, ps1.fcs.pre.proteobacteria)
ps1.fcs.r <- microbiome::transform(ps1.fcs, "compositional")
ps1.fcs.pre.proteobacteria.r <- microbiome::transform(ps1.fcs.pre.proteobacteria, "compositional")
ps1.fcs.pre.r <- microbiome::transform(ps1.fcs.pre, "compositional")
# input for alpha diversity
ps1.otu <- as.data.frame(ps1.fcs.r@otu_table)
ps1.tree <- ps1.fcs.r@phy_tree
ps1.pre.otu <- as.data.frame(ps1.fcs.pre.r@otu_table)
ps1.pre.tree <- ps1.fcs.pre.r@phy_tree
ps1.pre.proteobacteria.otu <- as.data.frame(ps1.fcs.pre.proteobacteria.r@otu_table)
ps1.pre.proteobacteria.tree <- ps1.fcs.pre.proteobacteria.r@phy_tree
# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df <- data.frame("Description" = colnames(ps1.otu))
ad.df$Shannon <- estimate_richness(ps1.fcs)$Shannon
## Warning in estimate_richness(ps1.fcs): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df$InvSimpson <- estimate_richness(ps1.fcs)$InvSimpson
## Warning in estimate_richness(ps1.fcs): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df <- merge(meta(ps1.fcs.r), ad.df, by = "Description", all.x=T)
ad.df <- ad.df
# mixed model: random term and variance structure
lme.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
data = ad.df, method = "REML",
random = ~ 1 | Ear_tag)
lme.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
data = ad.df, method = "REML",
random = ~ 1 | Ear_tag)
# residual plots to visually inspect normality assumption
plot(resid(lme.pd1, method= "pearson")~ad.df$Day_of_study_factor); abline(0,0)
hist(resid(lme.pd1, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.pd1, ∼ranef (.))
qqnorm(lme.pd1)
plot(resid(lme.pd2, method= "pearson")~ad.df$Day_of_study_factor); abline(0,0)
hist(resid(lme.pd2, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.pd2, ∼ranef (.))
qqnorm(lme.pd2)
# model output
aov.pd.ls1 <- anova(lme.pd1) #Shannon diversity
aov.pd.ls1 #no significant difference on all faecal samples between treatment groups
## numDF denDF F-value p-value
## (Intercept) 1 224 33750.89 <.0001
## Day_of_study_factor 7 224 27.89 <.0001
## Treatment 1 46 0.06 0.8071
## Day_of_study_factor:Treatment 7 224 2.50 0.0173
aov.pd.ls2 <- anova(lme.pd2) #InvSimpson diversity
aov.pd.ls2 #no significant difference on all faecal samples between treatment groups
## numDF denDF F-value p-value
## (Intercept) 1 224 1474.2569 <.0001
## Day_of_study_factor 7 224 12.5348 <.0001
## Treatment 1 46 0.4290 0.5157
## Day_of_study_factor:Treatment 7 224 2.0856 0.0461
# test differences between timepoints - Shannon diversity
res.aov2 <- aov(Shannon ~ Day_of_study_factor, data = ad.df)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Day_of_study_factor 7 22.42 3.203 26.96 <2e-16 ***
## Residuals 278 33.02 0.119
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Shannon ~ Day_of_study_factor, data = ad.df)
##
## $Day_of_study_factor
## diff lwr upr p adj
## Day_8-Day_4 0.31976503 0.10489282 0.53463724 0.0002185
## Day_14-Day_4 0.40313810 0.18712597 0.61915022 0.0000008
## Day_26-Day_4 0.55341430 0.33740218 0.76942643 0.0000000
## Day_35-Day_4 0.90604172 0.66580728 1.14627615 0.0000000
## Day_43-Day_4 0.77912256 0.53888812 1.01935699 0.0000000
## Day_59-Day_4 0.60190367 0.29802848 0.90577887 0.0000001
## Day_69-Day_4 0.69011902 0.38624382 0.99399421 0.0000000
## Day_14-Day_8 0.08337307 -0.13263906 0.29938519 0.9374044
## Day_26-Day_8 0.23364927 0.01763715 0.44966140 0.0236782
## Day_35-Day_8 0.58627669 0.34604225 0.82651112 0.0000000
## Day_43-Day_8 0.45935753 0.21912309 0.69959196 0.0000004
## Day_59-Day_8 0.28213864 -0.02173655 0.58601384 0.0905096
## Day_69-Day_8 0.37035398 0.06647879 0.67422918 0.0057767
## Day_26-Day_14 0.15027621 -0.06686985 0.36742226 0.4087047
## Day_35-Day_14 0.50290362 0.26164908 0.74415816 0.0000000
## Day_43-Day_14 0.37598446 0.13472992 0.61723900 0.0000843
## Day_59-Day_14 0.19876558 -0.10591672 0.50344788 0.4888469
## Day_69-Day_14 0.28698092 -0.01770138 0.59166322 0.0813335
## Day_35-Day_26 0.35262741 0.11137287 0.59388196 0.0003100
## Day_43-Day_26 0.22570825 -0.01554629 0.46696280 0.0855660
## Day_59-Day_26 0.04848937 -0.25619293 0.35317167 0.9997143
## Day_69-Day_26 0.13670471 -0.16797759 0.44138701 0.8699735
## Day_43-Day_35 -0.12691916 -0.39008280 0.13624448 0.8212386
## Day_59-Day_35 -0.30413805 -0.62644636 0.01817027 0.0802139
## Day_69-Day_35 -0.21592270 -0.53823102 0.10638561 0.4528516
## Day_59-Day_43 -0.17721889 -0.49952720 0.14508943 0.7006721
## Day_69-Day_43 -0.08900354 -0.41131186 0.23330477 0.9903988
## Day_69-Day_59 0.08821534 -0.28395424 0.46038493 0.9962323
# test differences between timepoints - InvSimpson diversity
res.aov2 <- aov(InvSimpson ~ Day_of_study_factor, data = ad.df)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Day_of_study_factor 7 8370 1195.8 12.23 1.27e-13 ***
## Residuals 278 27190 97.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = InvSimpson ~ Day_of_study_factor, data = ad.df)
##
## $Day_of_study_factor
## diff lwr upr p adj
## Day_8-Day_4 3.33470622 -2.83085321 9.500265648 0.7181115
## Day_14-Day_4 6.36693246 0.16866427 12.565200659 0.0392907
## Day_26-Day_4 10.76677344 4.56850525 16.965041637 0.0000063
## Day_35-Day_4 17.96301526 11.06971026 24.856320261 0.0000000
## Day_43-Day_4 12.31179973 5.41849473 19.205104730 0.0000030
## Day_59-Day_4 8.70841903 -0.01099873 17.427836796 0.0505517
## Day_69-Day_4 8.73454908 0.01513131 17.453966839 0.0492494
## Day_14-Day_8 3.03222624 -3.16604195 9.230494438 0.8101714
## Day_26-Day_8 7.43206722 1.23379903 13.630335417 0.0071567
## Day_35-Day_8 14.62830904 7.73500404 21.521614040 0.0000000
## Day_43-Day_8 8.97709351 2.08378851 15.870398510 0.0022395
## Day_59-Day_8 5.37371281 -3.34570495 14.093130576 0.5642845
## Day_69-Day_8 5.39984286 -3.31957491 14.119260619 0.5579884
## Day_26-Day_14 4.39984098 -1.83096428 10.630646239 0.3814915
## Day_35-Day_14 11.59608280 4.67350673 18.518658864 0.0000160
## Day_43-Day_14 5.94486727 -0.97770880 12.867443333 0.1521748
## Day_59-Day_14 2.34148657 -6.40109038 11.084063518 0.9920134
## Day_69-Day_14 2.36761661 -6.37496033 11.110193561 0.9914599
## Day_35-Day_26 7.19624182 0.27366575 14.118817885 0.0351338
## Day_43-Day_26 1.54502629 -5.37754978 8.467602354 0.9974194
## Day_59-Day_26 -2.05835441 -10.80093136 6.684222539 0.9963876
## Day_69-Day_26 -2.03222437 -10.77480131 6.710352582 0.9966654
## Day_43-Day_35 -5.65121553 -13.20245282 1.900021759 0.3052277
## Day_59-Day_35 -9.25459623 -18.50293537 -0.006257085 0.0497062
## Day_69-Day_35 -9.22846618 -18.47680533 0.019872958 0.0509429
## Day_59-Day_43 -3.60338070 -12.85171984 5.644958445 0.9343107
## Day_69-Day_43 -3.57725065 -12.82558980 5.671088488 0.9367079
## Day_69-Day_59 0.02613004 -10.65293214 10.705192230 1.0000000
# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df.pre <- data.frame("Description" = colnames(ps1.pre.otu))
ad.df.pre$Shannon <- estimate_richness(ps1.fcs.pre)$Shannon
## Warning in estimate_richness(ps1.fcs.pre): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df.pre$InvSimpson <- estimate_richness(ps1.fcs.pre)$InvSimpson
## Warning in estimate_richness(ps1.fcs.pre): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df.pre <- merge(meta(ps1.fcs.pre.r), ad.df.pre, by = "Description", all.x=T)
ad.df.pre <- ad.df.pre
# mixed model: random term and variance structure
lme.pre.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
data = ad.df.pre, method = "REML",
random = ~ 1 | Ear_tag)
lme.pre.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
data = ad.df.pre, method = "REML",
random = ~ 1 | Ear_tag)
# residual plots to visually inspect normality assumption
plot(resid(lme.pre.pd1, method= "pearson")~ad.df.pre$Day_of_study_factor); abline(0,0)
hist(resid(lme.pre.pd1, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.pre.pd1, ∼ranef (.))
qqnorm(lme.pre.pd1)
plot(resid(lme.pre.pd2, method= "pearson")~ad.df.pre$Day_of_study_factor); abline(0,0)
hist(resid(lme.pre.pd2, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.pre.pd2, ∼ranef (.))
qqnorm(lme.pre.pd2)
# model output
aov.pd.ls1 <- anova(lme.pre.pd1)
aov.pd.ls1
## numDF denDF F-value p-value
## (Intercept) 1 136 19714.580 <.0001
## Day_of_study_factor 3 136 24.048 <.0001
## Treatment 1 46 1.739 0.1938
## Day_of_study_factor:Treatment 3 136 2.317 0.0784
aov.pd.ls2 <- anova(lme.pre.pd2)
aov.pd.ls2
## numDF denDF F-value p-value
## (Intercept) 1 136 1222.0842 <.0001
## Day_of_study_factor 3 136 17.9269 <.0001
## Treatment 1 46 2.8908 0.0958
## Day_of_study_factor:Treatment 3 136 2.0146 0.1148
# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df.pre.proteobacteria <- data.frame("Description" = colnames(ps1.pre.proteobacteria.otu))
ad.df.pre.proteobacteria$Observed <- estimate_richness(ps1.fcs.pre.proteobacteria, measures = "Observed")$Observed
## Warning in estimate_richness(ps1.fcs.pre.proteobacteria, measures = "Observed"): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df.pre.proteobacteria$Shannon <- estimate_richness(ps1.fcs.pre.proteobacteria, measures = "Shannon")$Shannon
## Warning in estimate_richness(ps1.fcs.pre.proteobacteria, measures = "Shannon"): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
##
## We recommended that you find the un-trimmed data and retry.
ad.df.pre.proteobacteria <- merge(meta(ps1.fcs.pre.proteobacteria.r), ad.df.pre.proteobacteria, by = "Description", all.x=T)
ad.df.pre.proteobacteria <- ad.df.pre.proteobacteria
# mixed model: random term and variance structure
lme.pre.proteobacteria.pd1 <- lme(Observed ~ Day_of_study_factor * Treatment,
data = ad.df.pre.proteobacteria, method = "REML",
random = ~ 1 | Ear_tag)
lme.pre.proteobacteria.pd2 <- lme(Shannon ~ Day_of_study_factor * Treatment,
data = ad.df.pre.proteobacteria, method = "REML",
random = ~ 1 | Ear_tag)
# residual plots to visually inspect normality assumption
plot(resid(lme.pre.proteobacteria.pd1, method= "pearson")~ad.df.pre.proteobacteria$Day_of_study_factor); abline(0,0)
hist(resid(lme.pre.proteobacteria.pd1, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.pre.proteobacteria.pd1, ∼ranef (.))
qqnorm(lme.pre.proteobacteria.pd1)
plot(resid(lme.pre.proteobacteria.pd2, method= "pearson")~ad.df.pre.proteobacteria$Day_of_study_factor); abline(0,0)
hist(resid(lme.pre.proteobacteria.pd2, method= "pearson"), breaks=30, col="grey")
qqnorm(lme.pre.proteobacteria.pd2, ∼ranef (.))
qqnorm(lme.pre.proteobacteria.pd2)
# model output
aov.pd.ls1 <- anova(lme.pre.proteobacteria.pd1)
aov.pd.ls1
## numDF denDF F-value p-value
## (Intercept) 1 136 353.9852 <.0001
## Day_of_study_factor 3 136 12.0719 <.0001
## Treatment 1 46 4.8152 0.0333
## Day_of_study_factor:Treatment 3 136 2.0137 0.1149
aov.pd.ls2 <- anova(lme.pre.proteobacteria.pd2)
aov.pd.ls2
## numDF denDF F-value p-value
## (Intercept) 1 136 612.6292 <.0001
## Day_of_study_factor 3 136 3.1742 0.0263
## Treatment 1 46 0.1200 0.7306
## Day_of_study_factor:Treatment 3 136 0.5927 0.6208
Input data Principal Coordinates Analysis plot
ps1 <- readRDS("./phyobjects/ps1.EcN.rds")
ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))
ps1 <- subset_samples(ps1, Origin %in% c("faeces"))
ps1 <- prune_taxa(taxa_sums(ps1) > 0, ps1)
ps1 <- microbiome::transform(ps1, "compositional")
Ordination plot based on Weighted Unifrac
set.seed(49275)
ordu.wt.uni = ordinate(ps1, "PCoA", "bray")
wt.unifrac <- plot_ordination(ps1, ordu.wt.uni, color="Day_of_study_factor", shape="Treatment")
wt.unifrac <- wt.unifrac + scale_color_viridis(discrete = TRUE, option = "C")+ scale_fill_viridis(discrete = TRUE) +
ggtitle("Ordination plot - PCoA, Weighted UniFrac faecal samples") + geom_point(size = 3) + scale_shape_manual(values=c(15,17))
wt.unifrac <- wt.unifrac +
stat_ellipse(type = "norm", linetype = 5) +
theme_bw()
print(wt.unifrac)
pdf(file = "./figures/pcoa_bray_faecal_samples_shape_15_17_shape_treatment.pdf", height = 10, width = 15)
wt.unifrac
dev.off() #The final figure can be found as Figure 4 of the manuscript
## png
## 2
ps1 <- readRDS("./phyobjects/ps1.EcN.rds")
ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))
# subsetting
ps1.fcs <- subset_samples(ps1, Origin %in% c("faeces"))
ps1.fcs.pre <- subset_samples(ps1.fcs, Pre_or_post_weaning %in% c("Pre_weaning"))
ps1.fcs.post <- subset_samples(ps1.fcs, Pre_or_post_weaning %in% c("Post_weaning"))
ps1.fcs.r <- microbiome::transform(ps1.fcs, "compositional")
ps1.fcs.pre.r <- microbiome::transform(ps1.fcs.pre, "compositional")
ps1.fcs.post.r <- microbiome::transform(ps1.fcs.post, "compositional")
# Conversions of data for Adonis tests
fcs.otu <- abundances(ps1.fcs.r)
fcs.meta <- meta(ps1.fcs.r)
fcs.otu.pre <- abundances(ps1.fcs.pre.r)
fcs.meta.pre <- meta(ps1.fcs.pre.r)
fcs.otu.post <- abundances(ps1.fcs.post.r)
fcs.meta.post <- meta(ps1.fcs.post.r)
PERMANOVA significance test for group-level differences
set.seed(343)
permanova <- adonis(t(fcs.otu) ~ Treatment + Day_of_study_factor,
data = fcs.meta, permutations=9999, method = "bray")
print(as.data.frame(permanova$aov.tab)) #statistically significant differences in beta-diversity between treatment groups when taking into account all faecal timepoints (p = 0.0149)
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Treatment 1 0.4799172 0.4799172 2.056624 0.004823338 0.0149
## Day_of_study_factor 7 34.3805735 4.9115105 21.047653 0.345536953 0.0001
## Residuals 277 64.6384869 0.2333519 NA 0.649639709 NA
## Total 285 99.4989776 NA NA 1.000000000 NA
set.seed(456)
permanova.pre <- adonis(t(fcs.otu.pre) ~ Treatment + Day_of_study_factor,
data = fcs.meta.pre, permutations=9999, method = "bray")
print(as.data.frame(permanova.pre$aov.tab)) #statistically significant differences in beta-diversity between treatment groups when taking into account pre-weaning faecal timepoints (p = 0.005)
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Treatment 1 0.6112093 0.6112093 2.445152 0.01005824 5e-03
## Day_of_study_factor 3 13.9117835 4.6372612 18.551438 0.22893638 1e-04
## Residuals 185 46.2440344 0.2499678 NA 0.76100538 NA
## Total 189 60.7670272 NA NA 1.00000000 NA
set.seed(456)
permanova.post <- adonis(t(fcs.otu.post) ~ Treatment + Day_of_study_factor,
data = fcs.meta.post, permutations=9999, method = "bray")
print(as.data.frame(permanova.post$aov.tab)) #no statistically significant differences in beta-diversity between treatment groups when taking into account post-weaning faecal timepoints (p=0.0756)
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Treatment 1 0.302365 0.302365 1.532204 0.01389173 0.0756
## Day_of_study_factor 3 3.505529 1.168510 5.921303 0.16105654 0.0001
## Residuals 91 17.957937 0.197340 NA 0.82505173 NA
## Total 95 21.765831 NA NA 1.00000000 NA
# all faecal samples - dispersion between treatment groups
dist <- vegdist(t(fcs.otu), method="bray")
anova(betadisper(dist, fcs.meta$Treatment)) #statistical significant differences in dispersion between treatment groups, although really low sum of squares
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.00877 0.0087713 4.6162 0.03252 *
## Residuals 284 0.53963 0.0019001
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion <- betadisper(dist, group= fcs.meta$Treatment)
plot(dispersion, hull=FALSE, ellipse = TRUE) #plot shows there are not really differences in dispersion
# dispersion pre- vs post-weaning
anova(betadisper(dist, fcs.meta$Pre_or_post_weaning)) #statistical significant differences in dispersion between pre- and post-weaning samples, and high sum of squares
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.55784 0.55784 137.97 < 2.2e-16 ***
## Residuals 284 1.14825 0.00404
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion_weaning <- betadisper(dist, group= fcs.meta$Pre_or_post_weaning)
plot(dispersion_weaning, hull=FALSE, ellipse = TRUE)
# dispersion in each time-point
anova(betadisper(dist, fcs.meta$Day_of_study_factor)) #statistical significant differences in dispersion between time points
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 7 0.37805 0.054007 9.122 3.804e-10 ***
## Residuals 278 1.64588 0.005920
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion_day <- betadisper(dist, group = fcs.meta$Day_of_study_factor)
plot(dispersion_day, hull=FALSE, ellipse = TRUE)
# pre-weaning faecal samples - dispersion between treatment groups
dist.pre <- vegdist(t(fcs.otu.pre), method="bray")
anova(betadisper(dist.pre, fcs.meta.pre$Treatment)) #statistical significant differences in dispersion between groups in pre-weaning faecal samples, although really low sum of squares
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 1 0.01692 0.0169151 6.4247 0.01207 *
## Residuals 188 0.49497 0.0026328
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion.pre <- betadisper(dist.pre, group= fcs.meta.pre$Treatment)
plot(dispersion.pre, hull=FALSE, ellipse = TRUE)
# pre-weaning faecal samples - dispersion in each time-point
anova(betadisper(dist.pre, fcs.meta.pre$Day_of_study_factor)) #no statistical significant differences in dispersion between timepoints in pre-weaning faecal samples
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 3 0.02694 0.0089792 1.4967 0.2169
## Residuals 186 1.11587 0.0059993
dispersion.pre_day <- betadisper(dist.pre, group = fcs.meta.pre$Day_of_study_factor)
plot(dispersion.pre_day, hull=FALSE, ellipse = TRUE)
ps1.EcN <- readRDS("./phyobjects/ps1.EcN.rds")
ps1.EcN@sam_data$Day_of_study_factor <- factor(ps1.EcN@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1.EcN.g <- tax_glom(ps1.EcN, "Genus")
ps1.EcN.g.r <- microbiome::transform(ps1.EcN.g, "compositional")
ps1.EcN.g.r <- subset_samples(ps1.EcN.g.r, Origin %in% c("faeces"))
ps1.EcN.g.r <- subset_samples(ps1.EcN.g.r, Pre_or_post_weaning %in% c("Pre_weaning"))
ps1.EcN.g.r <- filter_taxa(ps1.EcN.g.r, function(x) sum(x > 0.001) > (0.30*length(x)), TRUE)
ps1.EcN.g.r #60 genera passed through the prevalence filter
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 60 taxa and 190 samples ]
## sample_data() Sample Data: [ 190 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 60 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 60 tips and 59 internal nodes ]
# tax table with OTU column and best_hit
EcN.tax <- as.data.frame(tax_table(ps1.EcN.g.r))
EcN.tax$OTU <- rownames(EcN.tax)
tax_table(ps1.EcN.g.r) <- tax_table(as.matrix(EcN.tax))
ps1.EcN.g.bh <- format_to_besthit(ps1.EcN.g.r)
EcN.tax.bh <- as.data.frame(tax_table(ps1.EcN.g.bh))
colnames(EcN.tax.bh)[7] <- "OTU"
EcN.g.met <- meta(ps1.EcN.g.r)
EcN.g1 <- EcN.g.met[,c("Description","Ear_tag", "EcN_NGS_relative", "department_pre_weaning",
"Treatment","Day_of_study_factor")]
# OTU table as dataframe:
EcN.g2 <- as.data.frame(t(otu_table(ps1.EcN.g.r)))
EcN.g2$Description <- rownames(EcN.g2)
# merge files
EcN.g3 <- merge(EcN.g1, EcN.g2, by = "Description")
colnames(EcN.g3)[7:ncol(EcN.g3)] <- paste("k__", sep = "", colnames(EcN.g3)[7:ncol(EcN.g3)])
Abundance testing on pre-weaning faecal samples
# Results, warnings and messages have been inactivated for this markdown, as otherwise it will use too much space in the HTML output file.
tc.treatment <- taxa.compare(taxtab = EcN.g3, propmed.rel = "gamlss",
transform = "none", comvar = "Treatment",
adjustvar = c("Day_of_study_factor", "department_pre_weaning"),
personid = "Ear_tag", longitudinal = "yes",
p.adjust.method = "fdr")
tc.EcN05 <- subset(tc.treatment, pval.adjust.TreatmentEcN < .05)
write.csv(tc.treatment, file = "./output_data/tc.treatment_pre-w_genera_department_filter0.001_prev0.3.csv")
tc.EcN05$id <- droplevels(as.factor(tc.EcN05$id))
EcN3.m <- reshape2::melt(EcN.g3)
EcN3.max <- ddply(EcN3.m, .(variable), summarise, max = max(value))
EcN3.max1 <- EcN3.max
EcN3.max1$variable <- droplevels(EcN3.max1$variable)
# subset
EcN.g4 <- subset(EcN3.m, variable %in% tc.EcN05$id &
variable %in% EcN3.max1$variable)
# create vector of genus names (OTU codes) to include in plots
select.gen.EcN <- levels(droplevels(EcN.g4$variable)) # 6 genera
select.gen.EcN <- sub("k__", "", select.gen.EcN) # remove "k__"
select.gen.EcN
tc.EcN.sub <- subset(tc.EcN05, id %in% EcN3.max1$variable)
# remove "k__" in id names
tc.EcN.sub$id <- as.factor(sub("k__", "", tc.EcN.sub$id))
# show tax names of output genera
EcN.g4.tax <- subset(EcN.tax.bh, OTU %in% select.gen.EcN) #significant genera
# create vectors of significant genera from comparisons
select.gen.EcN2 <- unique(c(as.character(select.gen.EcN)))
# filter for genera from first output series of GAMLSS tests (select.gen.EcN2).
# object 1: metadata
EcN.ls.met <- meta(ps1.EcN.g.r)
EcN.ls1 <- EcN.ls.met[,c("Description","Ear_tag", "EcN_NGS_relative", "department_pre_weaning", "Treatment","Day_of_study_factor")]
# object 2: OTU table
EcN.ls2 <- as.data.frame(t(otu_table(ps1.EcN.g.r)))
EcN.ls2$Description <- rownames(EcN.ls2)
# melt EcN.ls2
EcN.ls2.m <- reshape2::melt(EcN.ls2)
## Using Description as id variables
colnames(EcN.ls2.m) <- c("Description", "OTU", "abund")
# merge EcN.ls2, EcN.tax and EcN.ls1
EcN.ls3a <- base::merge(EcN.ls2.m, EcN.tax, by = "OTU")
EcN.ls3 <- base::merge(EcN.ls1, EcN.ls3a, by = "Description")
# subset to signif genera
EcN.ls3.max <- ddply(EcN.ls3, .(OTU), summarise, max = max(abund))
EcN.ls3.max$OTU <- droplevels(EcN.ls3.max$OTU)
EcN.ls3.s <- subset(EcN.ls3, OTU %in% select.gen.EcN2 & OTU %in%
EcN.ls3.max$OTU)
EcN.ls3.s$OTU <- droplevels(EcN.ls3.s$OTU)
theme4 <- theme_classic() +
theme(panel.grid.major = element_line(colour = "grey80"),
panel.spacing = unit(.5,"lines"),
panel.border = element_rect(color = "black", fill = NA, size = .5),
strip.background = element_blank(),
strip.placement = "outside",
text = element_text(size=15),
axis.text.x = element_text(hjust = .5, vjust = .5),
plot.title = element_text(size = 12))
labs_abd <- as_labeller(c(
"faeces" = "Faeces", "ileum_digesta" = "Ileum digesta", "jejunum_digesta" = " Jejunum digesta"))
# for-loop
sig.plot <- list()
for(i in 1:nlevels(EcN.ls3.s$OTU)){
a = levels(EcN.ls3.s$OTU)[i]
B = EcN.ls3.s[EcN.ls3.s$OTU == a,]
p = ggplot(B, aes(x = Day_of_study_factor, y = abund, color = Treatment)) +
geom_boxplot(outlier.size = 0) +
geom_point(aes(size = sqrt(EcN_NGS_relative), alpha = sqrt(EcN_NGS_relative)), position = position_jitterdodge(seed = 456)) +
geom_point(size = 1, aes(fill = Treatment), colour= 'black', position = position_jitterdodge(seed = 456)) +
labs(x="time (d)", y="rel. abundance") + scale_y_log10() +
ggtitle(paste(B$Order,B$Family,B$Genus,B$OTU)) + theme4
sig.plot[[i]] = p
}
# print plots
pdf("./figures/Plots_genus_sig_trimmed_compare_treatments_faeces_pre-w_padj.05_department_filter0.001_0.3_adj_y_axis_alpha_by_sqrt_ecn_rela_log10_y_axis_size_by_sqrt_ecn_rela_h8_w12.pdf", height = 8, width = 12)
for (i in 1:nlevels(EcN.ls3.s$OTU)) {
print(sig.plot[[i]])
}
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 123 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 127 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 132 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 129 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 112 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 88 rows containing non-finite values (stat_boxplot).
dev.off() #Resulting figures can be found under Supplementary Figure S11 of the manuscript
## png
## 2
ps1.EcN <- readRDS("./phyobjects/ps1.EcN.rds")
ps1.EcN@sam_data$Day_of_study_factor <- factor(ps1.EcN@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1.EcN.g <- tax_glom(ps1.EcN, "Genus")
ps1.EcN.g.r <- microbiome::transform(ps1.EcN.g, "compositional")
ps1.EcN.g.r <- subset_samples(ps1.EcN.g.r, Origin %in% c("faeces"))
ps1.EcN.g.r <- subset_samples(ps1.EcN.g.r, Pre_or_post_weaning %in% c("Post_weaning"))
ps1.EcN.g.r <- filter_taxa(ps1.EcN.g.r, function(x) sum(x > 0.001) > (0.30*length(x)), TRUE)
ps1.EcN.g.r # 87 genera passed through the prevalence filter
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 87 taxa and 96 samples ]
## sample_data() Sample Data: [ 96 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 87 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 87 tips and 86 internal nodes ]
# tax table with OTU column and best_hit
EcN.tax <- as.data.frame(tax_table(ps1.EcN.g.r))
EcN.tax$OTU <- rownames(EcN.tax)
tax_table(ps1.EcN.g.r) <- tax_table(as.matrix(EcN.tax))
ps1.EcN.g.bh <- format_to_besthit(ps1.EcN.g.r)
EcN.tax.bh <- as.data.frame(tax_table(ps1.EcN.g.bh))
colnames(EcN.tax.bh)[7] <- "OTU"
EcN.g.met <- meta(ps1.EcN.g.r)
EcN.g1 <- EcN.g.met[,c("Description","Ear_tag", "EcN_NGS_relative", "department_pre_weaning",
"Treatment","Day_of_study_factor")]
# OTU table as dataframe:
EcN.g2 <- as.data.frame(t(otu_table(ps1.EcN.g.r)))
EcN.g2$Description <- rownames(EcN.g2)
# merge files
EcN.g3 <- merge(EcN.g1, EcN.g2, by = "Description")
colnames(EcN.g3)[7:ncol(EcN.g3)] <- paste("k__", sep = "", colnames(EcN.g3)[7:ncol(EcN.g3)])
Abundance testing on post-weaning faecal samples
# Results, warnings and messages have been inactivated for this markdown, as otherwise it will use too much space in the HTML output file.
tc.treatment <- taxa.compare(taxtab = EcN.g3, propmed.rel = "gamlss",
transform = "none", comvar = "Treatment",
adjustvar = c("Day_of_study_factor", "department_pre_weaning"),
personid = "Ear_tag", longitudinal = "yes",
p.adjust.method = "fdr")
tc.EcN05 <- subset(tc.treatment, pval.adjust.TreatmentEcN < .05)
write.csv(tc.treatment, file = "./output_data/tc.treatment_post-w_genera_department_filter0.001_prev0.3.csv")
tc.EcN05$id <- droplevels(as.factor(tc.EcN05$id))
EcN3.m <- reshape2::melt(EcN.g3)
EcN3.max <- ddply(EcN3.m, .(variable), summarise, max = max(value))
EcN3.max1 <- EcN3.max
EcN3.max1$variable <- droplevels(EcN3.max1$variable)
# subset
EcN.g4 <- subset(EcN3.m, variable %in% tc.EcN05$id &
variable %in% EcN3.max1$variable)
# create vector of genus names (OTU codes) to include in plots
select.gen.EcN <- levels(droplevels(EcN.g4$variable))
select.gen.EcN <- sub("k__", "", select.gen.EcN) # remove "k__"
select.gen.EcN
tc.EcN.sub <- subset(tc.EcN05, id %in% EcN3.max1$variable)
# remove "k__" in id names
tc.EcN.sub$id <- as.factor(sub("k__", "", tc.EcN.sub$id))
# show tax names of output genera
EcN.g4.tax <- subset(EcN.tax.bh, OTU %in% select.gen.EcN) #significant genera
# create vectors of significant genera from comparisons
select.gen.EcN2 <- unique(c(as.character(select.gen.EcN)))
# filter for genera from first output series of GAMLSS tests (select.gen.EcN2).
# object 1: metadata
EcN.ls.met <- meta(ps1.EcN.g.r)
EcN.ls1 <- EcN.ls.met[,c("Description","Ear_tag", "EcN_NGS_relative", "department_pre_weaning", "Treatment","Day_of_study_factor")]
# object 2: OTU table
EcN.ls2 <- as.data.frame(t(otu_table(ps1.EcN.g.r)))
EcN.ls2$Description <- rownames(EcN.ls2)
# melt EcN.ls2
EcN.ls2.m <- reshape2::melt(EcN.ls2)
## Using Description as id variables
colnames(EcN.ls2.m) <- c("Description", "OTU", "abund")
# merge EcN.ls2, EcN.tax and EcN.ls1
EcN.ls3a <- base::merge(EcN.ls2.m, EcN.tax, by = "OTU")
EcN.ls3 <- base::merge(EcN.ls1, EcN.ls3a, by = "Description")
# subset to signif genera
EcN.ls3.max <- ddply(EcN.ls3, .(OTU), summarise, max = max(abund))
EcN.ls3.max$OTU <- droplevels(EcN.ls3.max$OTU)
EcN.ls3.s <- subset(EcN.ls3, OTU %in% select.gen.EcN2 & OTU %in%
EcN.ls3.max$OTU)
EcN.ls3.s$OTU <- droplevels(EcN.ls3.s$OTU)
theme4 <- theme_classic() +
theme(panel.grid.major = element_line(colour = "grey80"),
panel.spacing = unit(.5,"lines"),
panel.border = element_rect(color = "black", fill = NA, size = .5),
strip.background = element_blank(),
strip.placement = "outside",
text = element_text(size=15),
axis.text.x = element_text(hjust = .5, vjust = .5),
plot.title = element_text(size = 12))
labs_abd <- as_labeller(c(
"faeces" = "Faeces", "ileum_digesta" = "Ileum digesta", "jejunum_digesta" = " Jejunum digesta"))
# for-loop
sig.plot <- list()
for(i in 1:nlevels(EcN.ls3.s$OTU)){
a = levels(EcN.ls3.s$OTU)[i]
B = EcN.ls3.s[EcN.ls3.s$OTU == a,]
p = ggplot(B, aes(x = Day_of_study_factor, y = abund, color = Treatment)) +
geom_boxplot(outlier.size = 0) +
geom_point(aes(size = sqrt(EcN_NGS_relative), alpha = sqrt(EcN_NGS_relative)), position = position_jitterdodge(seed = 456)) +
geom_point(size = 1, aes(fill = Treatment), colour= 'black', position = position_jitterdodge(seed = 456)) +
labs(x="time (d)", y="rel. abundance") + scale_y_log10() +
ggtitle(paste(B$Order,B$Family,B$Genus,B$OTU)) + theme4
sig.plot[[i]] = p
}
# print plots
pdf("./figures/Plots_genus_sig_trimmed_compare_treatments_faeces_post-w_padj.05_department_filter0.001_0.3_adj_y_axis_alpha_by_sqrt_ecn_rela_log10_y_axis_size_by_sqrt_ecn_rela_h8_w12.pdf", height = 8, width = 12)
for (i in 1:nlevels(EcN.ls3.s$OTU)) {
print(sig.plot[[i]])
}
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 44 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 57 rows containing non-finite values (stat_boxplot).
dev.off() #Resulting figures can be found under Supplementary Figure S12 of the manuscript
## png
## 2
ps1.EcN <- readRDS("./phyobjects/ps1.EcN.rds")
ps1.EcN@sam_data$Day_of_study_factor <- factor(ps1.EcN@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1.EcN.g <- tax_glom(ps1.EcN, "Genus")
ps1.EcN.g.r <- microbiome::transform(ps1.EcN.g, "compositional")
ps1.EcN.g.r <- subset_samples(ps1.EcN.g.r, Origin %in% c("faeces"))
ps1.EcN.g.r <- filter_taxa(ps1.EcN.g.r, function(x) sum(x > 0.001) > (0.30*length(x)), TRUE)
ps1.EcN.g.r # 71 genera passed through the prevalence filter
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 71 taxa and 286 samples ]
## sample_data() Sample Data: [ 286 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 71 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 71 tips and 70 internal nodes ]
# tax table with OTU column and best_hit
EcN.tax <- as.data.frame(tax_table(ps1.EcN.g.r))
EcN.tax$OTU <- rownames(EcN.tax)
tax_table(ps1.EcN.g.r) <- tax_table(as.matrix(EcN.tax))
ps1.EcN.g.bh <- format_to_besthit(ps1.EcN.g.r)
EcN.tax.bh <- as.data.frame(tax_table(ps1.EcN.g.bh))
colnames(EcN.tax.bh)[7] <- "OTU"
EcN.g.met <- meta(ps1.EcN.g.r)
EcN.g1 <- EcN.g.met[,c("Description","Ear_tag", "EcN_NGS_relative", "department_pre_weaning",
"Treatment","Day_of_study_factor")]
# OTU table as dataframe:
EcN.g2 <- as.data.frame(t(otu_table(ps1.EcN.g.r)))
EcN.g2$Description <- rownames(EcN.g2)
# merge files
EcN.g3 <- merge(EcN.g1, EcN.g2, by = "Description")
colnames(EcN.g3)[7:ncol(EcN.g3)] <- paste("k__", sep = "", colnames(EcN.g3)[7:ncol(EcN.g3)])
Abundance testing on all faecal samples
# Results, warnings and messages have been inactivated for this markdown, as otherwise it will use too much space in the HTML output file.
tc.treatment <- taxa.compare(taxtab = EcN.g3, propmed.rel = "gamlss",
transform = "none", comvar = "Treatment",
adjustvar = c("Day_of_study_factor", "department_pre_weaning"),
personid = "Ear_tag", longitudinal = "yes",
p.adjust.method = "fdr")
tc.EcN05 <- subset(tc.treatment, pval.adjust.TreatmentEcN < .05)
write.csv(tc.treatment, file = "./output_data/tc.treatment_all_samples_genera_department_filter0.001_prev0.3.csv")
tc.EcN05$id <- droplevels(as.factor(tc.EcN05$id))
EcN3.m <- reshape2::melt(EcN.g3)
EcN3.max <- ddply(EcN3.m, .(variable), summarise, max = max(value))
EcN3.max1 <- EcN3.max
EcN3.max1$variable <- droplevels(EcN3.max1$variable)
# subset
EcN.g4 <- subset(EcN3.m, variable %in% tc.EcN05$id &
variable %in% EcN3.max1$variable)
# create vector of genus names (OTU codes) to include in plots
select.gen.EcN <- levels(droplevels(EcN.g4$variable))
select.gen.EcN <- sub("k__", "", select.gen.EcN) # remove "k__"
select.gen.EcN
tc.EcN.sub <- subset(tc.EcN05, id %in% EcN3.max1$variable)
# remove "k__" in id names
tc.EcN.sub$id <- as.factor(sub("k__", "", tc.EcN.sub$id))
# show tax names of output genera
EcN.g4.tax <- subset(EcN.tax.bh, OTU %in% select.gen.EcN) #significant genera
# create vectors of significant genera from comparisons
select.gen.EcN2 <- unique(c(as.character(select.gen.EcN)))
# filter for genera from first output series of GAMLSS tests (select.gen.EcN2).
# object 1: metadata
EcN.ls.met <- meta(ps1.EcN.g.r)
EcN.ls1 <- EcN.ls.met[,c("Description","Ear_tag", "EcN_NGS_relative", "department_pre_weaning", "Treatment","Day_of_study_factor")]
# object 2: OTU table
EcN.ls2 <- as.data.frame(t(otu_table(ps1.EcN.g.r)))
EcN.ls2$Description <- rownames(EcN.ls2)
# melt EcN.ls2
EcN.ls2.m <- reshape2::melt(EcN.ls2)
## Using Description as id variables
colnames(EcN.ls2.m) <- c("Description", "OTU", "abund")
# merge EcN.ls2, EcN.tax and EcN.ls1
EcN.ls3a <- base::merge(EcN.ls2.m, EcN.tax, by = "OTU")
EcN.ls3 <- base::merge(EcN.ls1, EcN.ls3a, by = "Description")
# subset to signif genera
EcN.ls3.max <- ddply(EcN.ls3, .(OTU), summarise, max = max(abund))
EcN.ls3.max$OTU <- droplevels(EcN.ls3.max$OTU)
EcN.ls3.s <- subset(EcN.ls3, OTU %in% select.gen.EcN2 & OTU %in%
EcN.ls3.max$OTU)
EcN.ls3.s$OTU <- droplevels(EcN.ls3.s$OTU)
theme4 <- theme_classic() +
theme(panel.grid.major = element_line(colour = "grey80"),
panel.spacing = unit(.5,"lines"),
panel.border = element_rect(color = "black", fill = NA, size = .5),
strip.background = element_blank(),
strip.placement = "outside",
text = element_text(size=15),
axis.text.x = element_text(hjust = .5, vjust = .5),
plot.title = element_text(size = 12))
labs_abd <- as_labeller(c(
"faeces" = "Faeces", "ileum_digesta" = "Ileum digesta", "jejunum_digesta" = " Jejunum digesta"))
# for-loop
sig.plot <- list()
for(i in 1:nlevels(EcN.ls3.s$OTU)){
a = levels(EcN.ls3.s$OTU)[i]
B = EcN.ls3.s[EcN.ls3.s$OTU == a,]
p = ggplot(B, aes(x = Day_of_study_factor, y = abund, color = Treatment)) +
geom_boxplot(outlier.size = 0) +
geom_point(aes(size = sqrt(EcN_NGS_relative), alpha = sqrt(EcN_NGS_relative)), position = position_jitterdodge(seed = 456)) +
geom_point(size = 1, aes(fill = Treatment), colour= 'black', position = position_jitterdodge(seed = 456)) +
labs(x="time (d)", y="rel. abundance") + scale_y_log10() +
ggtitle(paste(B$Order,B$Family,B$Genus,B$OTU)) + theme4
sig.plot[[i]] = p
}
# print plots
pdf("./figures/Plots_genus_sig_trim_comp_treatm_fcs_all_samp_padj.05_departm_filt0.001_0.3_adj_y_axis_alpha_by_sqrt_ecn_rela_log10_y_axis_size_by_sqrt_ecn_rela_h8_w12.pdf", height = 8, width = 12)
for (i in 1:nlevels(EcN.ls3.s$OTU)) {
print(sig.plot[[i]])
}
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 177 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 164 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 107 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 194 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 184 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 193 rows containing non-finite values (stat_boxplot).
dev.off() #Resulting figures can be found under Figure 4 of the manuscript
## png
## 2
ps1 <- readRDS("./phyobjects/ps1.EcN.rds")
taxa_names(physeq = ps1) <- interaction(rownames(ps1@tax_table),ps1@tax_table[,"Genus"],ps1@tax_table[,"Family"])
ps1 <- microbiome::transform(ps1, "compositional")
ps1 <- subset_taxa(ps1, Phylum == "Proteobacteria")
ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))
ps1 <- subset_samples(ps1, Origin %in% c("faeces"))
ps1 <- subset_samples(ps1, Pre_or_post_weaning %in% c("Pre_weaning"))
metadata <- ps1@sam_data
treatment1 <- rownames(metadata[metadata$Treatment != "Control"])
treatment2 <- rownames(metadata[metadata$Treatment == "Control"])
otu <- as.data.frame(ps1@otu_table)
taxa_group1 <- otu[,treatment1]
taxa_group2 <- otu[,treatment2]
significant <- list()
for (i in rownames(taxa_group1)){
x = wilcox.test(x= as.numeric(taxa_group1[i,]),y = as.numeric(taxa_group2[i,]),paired = F,exact = F)
significant[[i]] <- x$p.value
}
my_taxa <- do.call(rbind.data.frame, significant)
rownames(my_taxa) <- rownames(taxa_group1)
my_taxa[,2] <- 1
my_taxa[,3] <- "whatev"
colnames(my_taxa) <- c("pval","corrected","taxon")
my_taxa$taxon <- rownames(my_taxa)
my_taxa$corrected <- p.adjust(my_taxa[,1], method = "BH")
significantly_different_taxa <- my_taxa[which(my_taxa$corrected < 1),]
significantly_different_taxa2 <- rownames(significantly_different_taxa)
ps1.significantly_different_taxa <- ps1
ps1.significantly_different_taxa@otu_table <- ps1@otu_table[significantly_different_taxa2,]
ps1.significantly_different_taxa@tax_table <- ps1@tax_table[significantly_different_taxa2,]
p <- plot_heatmap(ps1.significantly_different_taxa, method = "PCA", sample.order = "Ear_tag", title = "Heatmap_of_pre_weaning_proteobacteria", taxa.order = "Genus", sample.label="Ear_tag", low="#FFFFCC", high="#000033", na.value="white") + facet_grid(~Day_of_study_factor + Treatment, scales = "free")
p
## Warning: Transformation introduced infinite values in discrete y-axis
pdf(file = "./figures/Heatmap_proteobacteria_otus_control_vs_ecn_no_testing_OTUgenusFamily_label_xlabel_eartag_order_eartag_treatment_grid.pdf", height = 15, width = 20)
p
## Warning: Transformation introduced infinite values in discrete y-axis
dev.off() #Resulting figure can be found under Supplementary Figure S9 of the manuscript
## png
## 2
pseq_not_annotated1 <- read_phyloseq(otu.file = "./input_data/Galaxy76-[NG-Tax__porcine_study_all_samples_eightrun_length120bp_no_annotation_0.005threshold_5may2020].biom1", taxonomy.file = TRUE, metadata.file = "./input_data/Metadata_porcine_study_june_2020.csv", "biom")
## Time to complete depends on OTU file size
## Warning in read_biom2phyloseq(biom.file = otu.file, taxonomy.file, metadata.file): Taxonomy is available both in the biom file and in
## the separate taxonomy.file. Using the biom file version here.
## To change this original taxonomy, do it explicitly in your code
## by modifying tax_table(physeq).
pseq_not_annotated1 <- prune_taxa(taxa_sums(pseq_not_annotated1) > 0, pseq_not_annotated1)
pseq_not_annotated1 # 110975 taxa in 489 samples
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 110975 taxa and 489 samples ]
## sample_data() Sample Data: [ 489 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 110975 taxa by 6 taxonomic ranks ]
saveRDS(pseq_not_annotated1, "./phyobjects/pseq_not_annotated1.rds")
# convert to relative abundance
pseq_not_annotated1 <- microbiome::transform(pseq_not_annotated1, "compositional")
tax_table(pseq_not_annotated1) <- cbind(tax_table(pseq_not_annotated1),
rownames(tax_table(pseq_not_annotated1)))
colnames(tax_table(pseq_not_annotated1)) <-
c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "OTUID")
colnames(tax_table(pseq_not_annotated1))
## [1] "Domain" "Phylum" "Class" "Order" "Family" "Genus" "OTUID"
ps1.1OTU <- subset_taxa(pseq_not_annotated1, OTUID == "2671525405")
ps1.1OTU
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1 taxa and 489 samples ]
## sample_data() Sample Data: [ 489 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 1 taxa by 7 taxonomic ranks ]
saveRDS(ps1.1OTU, "./phyobjects/ps1.1OTU.rds")
otu <- as.data.frame(ps1.1OTU@otu_table)
samples2 <- otu
samples3 <- t(samples2)
write.csv(samples3, file = "./EcN_abundance_NGS_dataset/ecn_otu_table_relative2.csv", fileEncoding = "UTF-16LE")
pseq_not_annotated1 <- readRDS("./phyobjects/pseq_not_annotated1.rds")
# select spiked samples
ps1 <- subset_samples(pseq_not_annotated1, Description %in% c("spk3", "spk4", "spk5", "spk6", "spk7"))
# convert to relative abundance
ps1 <- microbiome::transform(ps1, "compositional")
#remove taxa that are not present in the spiked samples
ps1 <- prune_taxa(taxa_sums(ps1) > 0, ps1)
print(ps1)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3821 taxa and 5 samples ]
## sample_data() Sample Data: [ 5 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 3821 taxa by 6 taxonomic ranks ]
ps1 <- filter_taxa(ps1, function(x) sum(x > 0.0125) > (0.005*length(x)), TRUE)
tax_table(ps1) <- cbind(tax_table(ps1),
rownames(tax_table(ps1)))
colnames(tax_table(ps1)) <-
c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "OTUID")
getPalette = colorRampPalette(brewer.pal(10, "Paired"))
#barplot with single OTU
p <- plot_bar(ps1, fill = "OTUID")
p <- p + facet_wrap(~Day_of_study_factor, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
colour = "lightgrey",
size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(10))
p
pdf(file = "./figures/Barplot_EcN_presence_in_spiked_samples.pdf", height = 10, width = 10)
p
dev.off() #Resulting figure can be found under Supplementary Figure S2 of the manuscript
## png
## 2
ps1.1OTU <- readRDS("./phyobjects/ps1.1OTU.rds")
ps1.1OTU@sam_data$Day_of_study_factor <- factor(ps1.1OTU@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1.1OTU <- subset_samples(ps1.1OTU, Unique %in% c("yes"))
ps1.1OTU <- subset_samples(ps1.1OTU, Origin %in% c("faeces"))
#subset to exclusively contain pre-weaning faecal samples, as post-weaning faecal samples do not contain the EcN-specific ASV:
ps1.1OTU <- subset_samples(ps1.1OTU, Day_of_study_factor %in% c("Day_4", "Day_8", "Day_14", "Day_26"))
ps1.1OTU <- subset_samples(ps1.1OTU, Treatment %in% c("EcN"))
getPalette = colorRampPalette(brewer.pal(24, "Paired"))
## Warning in brewer.pal(24, "Paired"): n too large, allowed maximum for palette Paired is 12
## Returning the palette you asked for with that many colors
#barplot with single OTU
p <- plot_bar(ps1.1OTU, x="Ear_tag", fill = "Ear_tag")
p <- p + facet_wrap(~Day_of_study_factor, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
colour = "lightgrey",
size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(24))
p
pdf(file = "./figures/Barplot_EcN_presence_over_time_in_faeces_pre-weaning_ear_tag.pdf", height = 15, width = 25)
p
dev.off() #Resulting figure can be found under Figure 2 of the manuscript
## png
## 2
ps1.1OTU <- readRDS("./phyobjects/ps1.1OTU.rds")
ps1.1OTU@sam_data$Origin_Day <- factor(ps1.1OTU@sam_data$Origin_Day, levels = c("faeces_Day_4", "faeces_Day_8", "faeces_Day_14", "faeces_Day_26", "faeces_Day_35", "faeces_Day_43", "faeces_Day_59", "faeces_Day_69", "jejunum_digesta_Day_27", "ileum_digesta_Day_27", "caecum_digesta_Day_27", "jejunum_digesta_Day_44", "ileum_digesta_Day_44", "caecum_digesta_Day_44", "jejunum_digesta_Day_70", "ileum_digesta_Day_70", "caecum_digesta_Day_70"))
ps1.1OTU <- subset_samples(ps1.1OTU, Unique %in% c("yes"))
ps1.1OTU <- subset_samples(ps1.1OTU, Origin %in% c("jejunum_digesta", "ileum_digesta", "caecum_digesta"))
#subset to exclusively contain pre-weaning faecal samples, as post-weaning faecal samples do not contain the EcN-specific ASV:
ps1.1OTU <- subset_samples(ps1.1OTU, Treatment %in% c("EcN"))
getPalette = colorRampPalette(brewer.pal(24, "Paired"))
## Warning in brewer.pal(24, "Paired"): n too large, allowed maximum for palette Paired is 12
## Returning the palette you asked for with that many colors
#barplot with single OTU
p <- plot_bar(ps1.1OTU, x="Ear_tag", fill = "Origin_Day")
p <- p + facet_wrap(~Origin_Day, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
colour = "lightgrey",
size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(24))
p
pdf(file = "./figures/Barplot_EcN_presence_over_time_in_digesta_ear_tag.pdf", height = 15, width = 25)
p
dev.off() #Resulting figure can be found under Supplementary Figure S6 of the manuscript
## png
## 2
ps1.1genus <- readRDS("./phyobjects/ps1.EcN.rds")
# convert to relative abundance
ps1.1genus <- microbiome::transform(ps1.1genus, "compositional")
ps1.1genus@sam_data$Day_of_study_factor <- factor(ps1.1genus@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
colnames(tax_table(ps1.1genus))
## [1] "Domain" "Phylum" "Class" "Order" "Family" "Genus"
tax_table(ps1.1genus) <- cbind(tax_table(ps1.1genus),
rownames(tax_table(ps1.1genus)))
colnames(tax_table(ps1.1genus)) <-
c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "OTUID")
ps1.1genus <- subset_taxa(ps1.1genus, Genus == "Treponema_2")
ps1.1genus <- subset_samples(ps1.1genus, Unique %in% c("yes"))
ps1.1genus <- subset_samples(ps1.1genus, Origin %in% c("faeces"))
ps1.1genus <- prune_taxa(taxa_sums(otu_table(ps1.1genus))>0, ps1.1genus)
getPalette = colorRampPalette(brewer.pal(ntaxa(ps1.1genus), "Paired"))
## Warning in brewer.pal(ntaxa(ps1.1genus), "Paired"): n too large, allowed maximum for palette Paired is 12
## Returning the palette you asked for with that many colors
#barplot with single OTU
p <- plot_bar(ps1.1genus, x="Ear_tag", fill = "OTU")
p <- p + facet_wrap(~Day_of_study_factor + Origin + Treatment, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
colour = "lightgrey",
size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(ntaxa(ps1.1genus)))
p
pdf(file = "./figures/Barplot_Treponema_2_presence_over_time_in_faeces_fill_OTU.pdf", height = 15, width = 25)
p
dev.off() #Resulting figure can be found under Supplementary Figure S13 of the manuscript
## png
## 2
ps1.1genus <- readRDS("./phyobjects/ps1.EcN.rds")
ps1.1genus@sam_data$Day_of_study_factor <- factor(ps1.1genus@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
colnames(tax_table(ps1.1genus))
## [1] "Domain" "Phylum" "Class" "Order" "Family" "Genus"
tax_table(ps1.1genus) <- cbind(tax_table(ps1.1genus),
rownames(tax_table(ps1.1genus)))
colnames(tax_table(ps1.1genus)) <-
c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "OTUID")
# convert to relative abundance
ps1.1genus <- microbiome::transform(ps1.1genus, "compositional")
# Select a single species within the Treponema genus:
#ps1.1genus <- subset_taxa(ps1.1genus, OTUID == "8138091913") #Treponema Succinifaciens
#ps1.1genus <- subset_taxa(ps1.1genus, OTUID == "8138091950") #Treponema porcinum
#ps1.1genus <- subset_taxa(ps1.1genus, OTUID == "8138091954") #Treponema parvum
#ps1.1genus <- subset_taxa(ps1.1genus, OTUID == "8138092035") #Treponema berlinense
ps1.1genus <- subset_taxa(ps1.1genus, OTUID == "8138091173" | OTUID == "813809349") #Candidatus Treponema suis
ps1.1genus <- subset_samples(ps1.1genus, Unique %in% c("yes"))
ps1.1genus <- subset_samples(ps1.1genus, Origin %in% c("faeces")) #, "caecum_digesta"))
ps1.1genus <- prune_taxa(taxa_sums(otu_table(ps1.1genus))>0, ps1.1genus)
getPalette = colorRampPalette(brewer.pal(ntaxa(ps1.1genus), "Paired"))
## Warning in brewer.pal(ntaxa(ps1.1genus), "Paired"): minimal value for n is 3, returning requested palette with 3 different levels
#barplot with single OTU
p <- plot_bar(ps1.1genus, x="Ear_tag", fill = "OTU")
p <- p + facet_wrap(~Day_of_study_factor + Origin + Treatment, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
colour = "lightgrey",
size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(ntaxa(ps1.1genus)))
p
pdf(file = "./figures/Barplot_candidatus_treponema_suis1_and_2_singleOTU_presence_over_time_fill_OTU_faeces.pdf", height = 15, width = 25)
p
dev.off() #Resulting figure can be found under Supplementary Figure S13 of the manuscript
## png
## 2
ps1.1genus <- readRDS("./phyobjects/ps1.EcN.rds")
ps1.1genus@sam_data$Day_of_study_factor <- factor(ps1.1genus@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
# convert to relative abundance
ps1.1genus <- microbiome::transform(ps1.1genus, "compositional")
ps1.1genus <- subset_taxa(ps1.1genus, Genus == "Holdemanella")
ps1.1genus <- subset_samples(ps1.1genus, Unique %in% c("yes"))
ps1.1genus <- subset_samples(ps1.1genus, Origin %in% c("faeces"))
ps1.1genus <- subset_samples(ps1.1genus, Pre_or_post_weaning %in% c("Pre_weaning"))
ps1.1genus <- prune_taxa(taxa_sums(otu_table(ps1.1genus))>0, ps1.1genus)
getPalette = colorRampPalette(brewer.pal(ntaxa(ps1.1genus), "Paired"))
#barplot with single OTU
p <- plot_bar(ps1.1genus, x="Ear_tag", fill = "OTU")
p <- p + facet_wrap(~Day_of_study_factor + Origin + Treatment, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
colour = "lightgrey",
size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(ntaxa(ps1.1genus)))
p
pdf(file = "./figures/Barplot_Holdemanella_presence_over_time_in_faeces_pre-w_fill_OTU.pdf", height = 15, width = 25)
p
dev.off() #This figure was not used in the manuscript, but does contain interesting data.
## png
## 2
ps1.1genus <- readRDS("./phyobjects/ps1.EcN.rds")
ps1.1genus@sam_data$Day_of_study_factor <- factor(ps1.1genus@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
# convert to relative abundance
ps1.1genus <- microbiome::transform(ps1.1genus, "compositional")
ps1.1genus <- subset_taxa(ps1.1genus, Genus == "Ruminococcaceae_UCG−014")
ps1.1genus <- subset_samples(ps1.1genus, Unique %in% c("yes"))
ps1.1genus <- subset_samples(ps1.1genus, Origin %in% c("faeces"))
ps1.1genus <- prune_taxa(taxa_sums(otu_table(ps1.1genus))>0, ps1.1genus)
getPalette = colorRampPalette(brewer.pal(ntaxa(ps1.1genus), "Paired"))
## Warning in brewer.pal(ntaxa(ps1.1genus), "Paired"): n too large, allowed maximum for palette Paired is 12
## Returning the palette you asked for with that many colors
#barplot with single OTU
p <- plot_bar(ps1.1genus, x="Ear_tag", fill = "OTU")
p <- p + facet_wrap(~Day_of_study_factor + Origin + Treatment, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
colour = "lightgrey",
size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(ntaxa(ps1.1genus)))
p
pdf(file = "./figures/Barplot_Ruminococcaceae_UCG−014_presence_over_time_in_faeces_fill_OTU.pdf", height = 15, width = 25)
p
dev.off() #This figure was not used in the manuscript, but does contain interesting data.
## png
## 2
ps1.1genus <- readRDS("./phyobjects/ps1.EcN.rds")
ps1.1genus@sam_data$Day_of_study_factor <- factor(ps1.1genus@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
# convert to relative abundance
ps1.1genus <- microbiome::transform(ps1.1genus, "compositional")
ps1.1genus <- subset_taxa(ps1.1genus, Genus == "Terrisporobacter")
ps1.1genus <- subset_samples(ps1.1genus, Unique %in% c("yes"))
ps1.1genus <- subset_samples(ps1.1genus, Origin %in% c("faeces"))
ps1.1genus <- subset_samples(ps1.1genus, Pre_or_post_weaning %in% c("Pre_weaning"))
ps1.1genus <- prune_taxa(taxa_sums(otu_table(ps1.1genus))>0, ps1.1genus)
getPalette = colorRampPalette(brewer.pal(ntaxa(ps1.1genus), "Paired"))
## Warning in brewer.pal(ntaxa(ps1.1genus), "Paired"): minimal value for n is 3, returning requested palette with 3 different levels
#barplot with single OTU
p <- plot_bar(ps1.1genus, x="Ear_tag", fill = "OTU")
p <- p + facet_wrap(~Day_of_study_factor + Origin + Treatment, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
colour = "lightgrey",
size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(ntaxa(ps1.1genus)))
p
pdf(file = "./figures/Barplot_Terrisporobacter_presence_over_time_in_faeces_pre-w_fill_OTU.pdf", height = 15, width = 25)
p
dev.off() #This figure was not used in the manuscript, but does contain interesting data.
## png
## 2
Compare EcN abundance in NGS vs qPCR data
ps1 <- readRDS("./phyobjects/ps1.EcN.rds")
ps1.tested <- ps1 <- subset_samples(ps1, fcs_tested_qpcr %in% c("yes"))
ps1.tested
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3022 taxa and 35 samples ]
## sample_data() Sample Data: [ 35 samples by 171 sample variables ]
## tax_table() Taxonomy Table: [ 3022 taxa by 6 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 3022 tips and 3021 internal nodes ]
ps1.tested.df <- as.data.frame(sample_data(ps1.tested))
p <- ggplot(ps1.tested.df,
mapping = aes(rel_abund_ecn_ct33, EcN_NGS_relative)) +
geom_point(size = 3, alpha = 0.9, colour = 'darkgreen') + scale_x_log10() + scale_y_log10() +
geom_smooth(method='lm', se = FALSE)
p
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
pdf(file = "./figures/Compare_EcN_abund_NGS_vs_qPCR_ct33.pdf", height = 10, width = 15)
p
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
dev.off()
## png
## 2
#Calculate r-squared:
x <- c(ps1.tested.df$rel_abund_ecn_ct33)
y <- c(ps1.tested.df$EcN_NGS_relative)
ps1.tested.df.calc_r2_merged <- data.frame(x, y)
summary(lm(x ~ y, data=ps1.tested.df.calc_r2_merged))$r.squared
## [1] 0.837213
res <- read.table("./input_data/ileum_data_volcano_plot_for_R_d27_ecn_padj.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
head(res)
## Gene log2FoldChange pvalue padj
## 1 C3 0.08798021 0.6818075 1
## 2 C5 0.16858111 0.7429596 1
## 3 DEFB1 -0.22286587 0.6403522 1
## 4 GAPDH -0.03078863 0.7210233 1
## 5 IL1B -0.89565748 0.3587843 1
## 6 IL8 -0.17655240 0.5781095 1
colnames(res) <- c("Gene","log2FoldChange","pvalue", "padj")
res$padj <- rownames(res)
res$padj <- p.adjust(res[,3], method = "fdr")
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
## Warning: package 'calibrate' was built under R version 3.6.3
##
## Attaching package: 'calibrate'
## The following objects are masked from 'package:vegan':
##
## calibrate, rda
with(subset(res, padj<.5 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
# The pdf figure should be used to really look at the data, as it has better visibility
pdf(file = "./figures/ileum_data_volcano_plot_for_R_d27_ecn_padj_xy_limits.pdf", height = 10, width = 15)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, pvalue<1 & abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png
## 2
res <- read.table("./input_data/ileum_data_volcano_plot_for_R_d44_ecn_padj.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
head(res)
## Gene log2FoldChange pvalue padj
## 1 C3 -0.15456308 0.5452612 1
## 2 C5 -0.28635917 0.4276373 1
## 3 DEFB1 -0.72634279 0.3188917 1
## 4 GAPDH -0.01462537 0.8921610 1
## 5 IL1B 0.33275775 0.3512406 1
## 6 IL8 0.02795401 0.9142406 1
colnames(res) <- c("Gene","log2FoldChange","pvalue", "padj")
res$padj <- rownames(res)
res$padj <- p.adjust(res[,3], method = "fdr")
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.5 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
# The pdf figure should be used to really look at the data, as it has better visibility
pdf(file = "./figures/ileum_data_volcano_plot_for_R_d44_ecn_padj_xy_limits.pdf", height = 10, width = 15)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, pvalue<1 & abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png
## 2
res <- read.table("./input_data/ileum_data_volcano_plot_for_R_d27_vs_d44_ecn_padj.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
head(res)
## Gene log2FoldChange pvalue padj
## 1 C3 0.46283920 0.009418346 1
## 2 C5 0.74245387 0.015504338 1
## 3 DEFB1 -1.34201104 0.002481060 1
## 4 GAPDH 0.09427848 0.164278001 1
## 5 IL1B -0.09544802 0.849716378 1
## 6 IL8 0.79363040 0.000339299 1
colnames(res) <- c("Gene","log2FoldChange","pvalue", "padj")
res$padj <- rownames(res)
res$padj <- p.adjust(res[,3], method = "fdr")
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot"))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.05 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
pdf(file = "./figures/ileum_data_volcano_plot_for_R_d27_vs_d44_ecn_padj_xy_limits.pdf", height = 10, width = 15)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot"))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.05 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png
## 2
# The pdf figure should be used to really look at the data, as it has better visibility
pdf(file = "./figures/ileum_data_volcano_plot_for_R_d27_vs_d44_ecn_padj_xy_limits_h20_w30.pdf", height = 20, width = 30)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot"))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.05 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png
## 2
res <- read.table("./input_data/colon_data_volcano_plot_for_R_d27_padj.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
head(res)
## Gene log2FoldChange pvalue padj
## 1 C3 0.3053966 0.33290755 1
## 2 C5 0.9739564 0.02975623 1
## 3 IL1B 0.7899645 0.59146375 1
## 4 IL8 0.1766245 0.78980545 1
## 5 IL12A 0.6721809 0.17577574 1
## 6 IFNA -1.0590843 0.31255028 1
colnames(res) <- c("Gene","log2FoldChange","pvalue", "padj")
res$padj <- rownames(res)
res$padj <- p.adjust(res[,3], method = "fdr")
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.5 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
# The pdf figure should be used to really look at the data, as it has better visibility
pdf(file = "./figures/colon_data_volcano_plot_for_R_d27_padj_xy_limits.pdf", height = 10, width = 15)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, pvalue<1 & abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png
## 2
res <- read.table("./input_data/colon_data_volcano_plot_for_R_d44_padj.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
head(res)
## Gene log2FoldChange pvalue padj
## 1 C3 -0.381683678 0.09031378 1
## 2 C5 -0.003133154 0.99435307 1
## 3 IL1B 0.045352502 0.95020697 1
## 4 IL8 0.465913302 0.31490536 1
## 5 IL12A 0.534419071 0.51276538 1
## 6 IFNA -0.254140818 0.66350955 1
colnames(res) <- c("Gene","log2FoldChange","pvalue", "padj")
res$padj <- rownames(res)
res$padj <- p.adjust(res[,3], method = "fdr")
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.5 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
# The pdf figure should be used to really look at the data, as it has better visibility
pdf(file = "./figures/colon_data_volcano_plot_for_R_d44_padj_xy_limits.pdf", height = 10, width = 15)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, pvalue<1 & abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png
## 2
res <- read.table("./input_data/colon_data_volcano_plot_for_R_d27_vs_d44_padj.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
head(res)
## Gene log2FoldChange pvalue padj
## 1 C3 0.35658942 0.062154613 1
## 2 C5 0.75487339 0.024536378 1
## 3 IL1B -0.11063755 0.888882580 1
## 4 IL8 1.18235754 0.004382547 1
## 5 IL12A 0.06043495 0.896050835 1
## 6 IFNA -0.64101309 0.324858334 1
colnames(res) <- c("Gene","log2FoldChange","pvalue", "padj")
res$padj <- rownames(res)
res$padj <- p.adjust(res[,3], method = "fdr")
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot"))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.05 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
with(subset(res, abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
pdf(file = "./figures/colon_data_volcano_plot_for_R_d27_vs_d44_padj_xy_limits.pdf", height = 10, width = 15)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot"))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.05 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
with(subset(res, abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png
## 2
# The pdf figure should be used to really look at the data, as it has better visibility
pdf(file = "./figures/colon_data_volcano_plot_for_R_d27_vs_d44_padj_xy_limits_h20_w30.pdf", height = 20, width = 30)
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot"))
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))
# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.05 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
with(subset(res, abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png
## 2
# Here, the analysis of one of the immunological data files is showcased. All other immunological files that are present within the data repository were run with the same code, although with alternating parameters.
my_data <- read.table("./input_data/Flow_cytometry_NKT_PBMC_Memory_Ki67_EcN.csv",
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
boxplot(score ~ group*time,
col=c("white","lightgray"),my_data)
fm1 <- lme(score ~ time + group + time * group, data = my_data, method = "REML", random = ~ 1 | department, na.action=na.omit)
anova(fm1)
## numDF denDF F-value p-value
## (Intercept) 1 52 2649.0293 <.0001
## time 3 52 114.8167 <.0001
## group 1 52 0.7161 0.4013
## time:group 3 52 2.8845 0.0444
lme.pre.pd1 <- lme(score ~ time + group + time * group,
data = my_data, method = "REML",
random = ~ 1 | department, na.action=na.omit)
summary(lme.pre.pd1)
## Linear mixed-effects model fit by REML
## Data: my_data
## AIC BIC logLik
## 424.6632 444.7366 -202.3316
##
## Random effects:
## Formula: ~1 | department
## (Intercept) Residual
## StdDev: 0.0002292705 8.246591
##
## Fixed effects: score ~ time + group + time * group
## Value Std.Error DF t-value p-value
## (Intercept) 16.52500 2.915610 52 5.667767 0.0000
## timet2 52.51250 4.123296 52 12.735565 0.0000
## timet3 39.40000 4.123296 52 9.555463 0.0000
## timet4 57.30000 4.123296 52 13.896651 0.0000
## groupEcN 9.03214 4.268016 52 2.116239 0.0391
## timet2:groupEcN -14.19464 5.934436 52 -2.391911 0.0204
## timet3:groupEcN -12.60714 5.934436 52 -2.124405 0.0384
## timet4:groupEcN -15.64464 5.934436 52 -2.636248 0.0110
## Correlation:
## (Intr) timet2 timet3 timet4 grpEcN tm2:EN tm3:EN
## timet2 -0.707
## timet3 -0.707 0.500
## timet4 -0.707 0.500 0.500
## groupEcN -0.683 0.483 0.483 0.483
## timet2:groupEcN 0.491 -0.695 -0.347 -0.347 -0.719
## timet3:groupEcN 0.491 -0.347 -0.695 -0.347 -0.719 0.517
## timet4:groupEcN 0.491 -0.347 -0.347 -0.695 -0.719 0.517 0.517
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.002342497 -0.511250204 0.003031555 0.389121820 2.482844067
##
## Number of Observations: 63
## Number of Groups: 4
lme.pre.pd1
## Linear mixed-effects model fit by REML
## Data: my_data
## Log-restricted-likelihood: -202.3316
## Fixed: score ~ time + group + time * group
## (Intercept) timet2 timet3 timet4 groupEcN
## 16.525000 52.512500 39.400000 57.300000 9.032143
## timet2:groupEcN timet3:groupEcN timet4:groupEcN
## -14.194643 -12.607143 -15.644643
##
## Random effects:
## Formula: ~1 | department
## (Intercept) Residual
## StdDev: 0.0002292705 8.246591
##
## Number of Observations: 63
## Number of Groups: 4
anova(lme.pre.pd1)
## numDF denDF F-value p-value
## (Intercept) 1 52 2649.0293 <.0001
## time 3 52 114.8167 <.0001
## group 1 52 0.7161 0.4013
## time:group 3 52 2.8845 0.0444
# There is no need for temporal autocorrelation (p-value not significant)
lme.pre.pd2 <- lme(score ~ time + group + time * group,
data = my_data, method = "REML",
cor = corAR1(), random = ~ 1 | id | department, na.action=na.omit)
lme.pre.pd2
## Linear mixed-effects model fit by REML
## Data: my_data
## Log-restricted-likelihood: -200.1735
## Fixed: score ~ time + group + time * group
## (Intercept) timet2 timet3 timet4 groupEcN
## 16.487635 52.514678 39.008222 57.567593 8.729937
## timet2:groupEcN timet3:groupEcN timet4:groupEcN
## -13.400865 -12.131159 -15.449448
##
## Random effects:
## Formula: ~1 | id | department
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 2.952821e-04 (Intr)
## 1 | idTRUE 5.081642e-11 0
## Residual 8.106883e+00
##
## Correlation Structure: AR(1)
## Formula: ~1 | department
## Parameter estimate(s):
## Phi
## -0.302995
## Number of Observations: 63
## Number of Groups: 4
anova(lme.pre.pd1, lme.pre.pd2)
## Model df AIC BIC logLik Test L.Ratio p-value
## lme.pre.pd1 1 10 424.6632 444.7366 -202.3316
## lme.pre.pd2 2 13 426.3469 452.4422 -200.1735 1 vs 2 4.316316 0.2293
# Rest of code
aov.pd.ls1 <- anova(lme.pre.pd1)
aov.pd.ls1
## numDF denDF F-value p-value
## (Intercept) 1 52 2649.0293 <.0001
## time 3 52 114.8167 <.0001
## group 1 52 0.7161 0.4013
## time:group 3 52 2.8845 0.0444
res.aov2 <- aov(score ~ group + time, data = my_data)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 1 8 8 0.109 0.743
## time 3 23465 7822 104.800 <2e-16 ***
## Residuals 58 4329 75
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
res.aov3 <- aov(score ~ group + time + group:time, data = my_data)
summary(res.aov3)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 1 8 8 0.120 0.7309
## time 3 23465 7822 115.016 <2e-16 ***
## group:time 3 588 196 2.885 0.0438 *
## Residuals 55 3740 68
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
TukeyHSD(res.aov2)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = score ~ group + time, data = my_data)
##
## $group
## diff lwr upr p adj
## EcN-Control -0.7184476 -5.076465 3.63957 0.7425918
##
## $time
## diff lwr upr p adj
## t2-t1 45.74020 37.527410 53.952987 0.0000000
## t3-t1 33.42145 25.208660 41.634237 0.0000000
## t4-t1 49.80270 41.589910 58.015487 0.0000000
## t3-t2 -12.31875 -20.397989 -4.239511 0.0009148
## t4-t2 4.06250 -4.016739 12.141739 0.5478712
## t4-t3 16.38125 8.302011 24.460489 0.0000087
#lmer (lme4)
fit1 <- lmer(score ~ group + time + (1 | department), REML=F, data = my_data)
## boundary (singular) fit: see ?isSingular
summary(fit1)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: score ~ group + time + (1 | department)
## Data: my_data
##
## AIC BIC logLik deviance df.resid
## 459.3 474.3 -222.6 445.3 56
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2848 -0.6334 -0.2023 0.6662 2.7628
##
## Random effects:
## Groups Name Variance Std.Dev.
## department (Intercept) 0.00 0.000
## Residual 68.71 8.289
## Number of obs: 63, groups: department, 4
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 21.561 2.352 9.167
## groupEcN -1.759 2.090 -0.842
## timet2 45.775 2.980 15.361
## timet3 33.456 2.980 11.227
## timet4 49.837 2.980 16.724
##
## Correlation of Fixed Effects:
## (Intr) grpEcN timet2 timet3
## groupEcN -0.415
## timet2 -0.644 -0.023
## timet3 -0.644 -0.023 0.516
## timet4 -0.644 -0.023 0.516 0.516
## convergence code: 0
## boundary (singular) fit: see ?isSingular
drop1(fit1, test = "Chisq")
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## Single term deletions
##
## Model:
## score ~ group + time + (1 | department)
## npar AIC LRT Pr(Chi)
## <none> 459.27
## group 1 457.98 0.705 0.4012
## time 3 570.42 117.150 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit2 <- lmer(score ~ group + time + (1 + time | id), REML=F, data = my_data)
## boundary (singular) fit: see ?isSingular
summary(fit2)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: score ~ group + time + (1 + time | id)
## Data: my_data
##
## AIC BIC logLik deviance df.resid
## 474.2 508.5 -221.1 442.2 47
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5995 -0.6195 -0.1262 0.7092 2.8514
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## id (Intercept) 7.733 2.781
## timet2 1.122 1.059 -1.00
## timet3 3.870 1.967 -1.00 1.00
## timet4 6.444 2.539 1.00 -1.00 -1.00
## Residual 58.751 7.665
## Number of obs: 63, groups: id, 8
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 21.471 2.388 8.990
## groupEcN -1.819 1.933 -0.941
## timet2 45.895 2.783 16.489
## timet3 33.576 2.844 11.804
## timet4 49.957 2.900 17.224
##
## Correlation of Fixed Effects:
## (Intr) grpEcN timet2 timet3
## groupEcN -0.376
## timet2 -0.637 -0.024
## timet3 -0.670 -0.024 0.530
## timet4 -0.431 -0.023 0.446 0.401
## convergence code: 0
## boundary (singular) fit: see ?isSingular
drop1(fit2, test = "Chisq")
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## Single term deletions
##
## Model:
## score ~ group + time + (1 + time | id)
## npar AIC LRT Pr(Chi)
## <none> 474.18
## group 1 473.06 0.877 0.3491
## time 3 501.07 32.887 3.403e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
version
## _
## platform x86_64-w64-mingw32
## arch x86_64
## os mingw32
## system x86_64, mingw32
## status
## major 3
## minor 6.1
## year 2019
## month 07
## day 05
## svn rev 76782
## language R
## version.string R version 3.6.1 (2019-07-05)
## nickname Action of the Toes
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19043)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_Netherlands.1252 LC_CTYPE=English_Netherlands.1252
## [3] LC_MONETARY=English_Netherlands.1252 LC_NUMERIC=C
## [5] LC_TIME=English_Netherlands.1252
##
## attached base packages:
## [1] splines parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] calibrate_1.7.7 gamlss_5.1-7
## [3] gamlss.dist_5.1-7 gamlss.data_5.1-4
## [5] datarium_0.1.0 rstatix_0.6.0
## [7] forcats_0.5.0 stringr_1.4.0
## [9] readr_1.3.1 tidyr_1.1.1
## [11] tibble_3.0.3 tidyverse_1.3.0
## [13] corrplot_0.84 metamicrobiomeR_1.1
## [15] ggvegan_0.1-0 psych_2.0.7
## [17] DESeq2_1.26.0 SummarizedExperiment_1.16.1
## [19] DelayedArray_0.12.3 BiocParallel_1.20.1
## [21] matrixStats_0.56.0 Biobase_2.46.0
## [23] GenomicRanges_1.38.0 GenomeInfoDb_1.22.1
## [25] IRanges_2.20.2 S4Vectors_0.24.4
## [27] BiocGenerics_0.32.0 DT_0.13
## [29] propr_4.2.6 CoDaSeq_0.99.6
## [31] car_3.0-8 carData_3.0-4
## [33] zCompositions_1.3.4 truncnorm_1.0-8
## [35] NADA_1.6-1.1 compositions_2.0-0
## [37] ALDEx2_1.18.0 RColorBrewer_1.1-2
## [39] viridis_0.5.1 viridisLite_0.3.0
## [41] purrr_0.3.4 data.table_1.12.8
## [43] multcompView_0.1-8 multcomp_1.4-13
## [45] TH.data_1.0-10 MASS_7.3-51.4
## [47] mvtnorm_1.1-1 emmeans_1.4.8
## [49] picante_1.8.2 vegan_2.5-6
## [51] permute_0.9-5 Hmisc_4.4-0
## [53] Formula_1.2-3 survival_3.1-12
## [55] lattice_0.20-38 decontam_1.6.0
## [57] pheatmap_1.0.12 apeglm_1.8.0
## [59] sciplot_1.2-0 lme4_1.1-23
## [61] Matrix_1.2-17 nlme_3.1-140
## [63] ggrepel_0.8.2 knitr_1.29
## [65] scales_1.1.1 reshape2_1.4.4
## [67] ape_5.4 dplyr_1.0.1
## [69] plyr_1.8.6 ggpubr_0.4.0
## [71] microbiomeutilities_0.99.02 microbiome_2.1.26
## [73] ggplot2_3.3.2 phyloseq_1.30.0
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.1.0 RSQLite_2.2.0 AnnotationDbi_1.48.0
## [4] htmlwidgets_1.5.1 grid_3.6.1 Rtsne_0.15
## [7] munsell_0.5.0 codetools_0.2-16 statmod_1.4.34
## [10] withr_2.2.0 colorspace_1.4-1 rstudioapi_0.11
## [13] robustbase_0.93-6 bayesm_3.1-4 ggsignif_0.6.0
## [16] labeling_0.3 bbmle_1.0.23.1 GenomeInfoDbData_1.2.2
## [19] mnormt_1.5-7 farver_2.0.3 bit64_4.0.2
## [22] rhdf5_2.30.1 coda_0.19-3 vctrs_0.3.2
## [25] generics_0.0.2 xfun_0.16 R6_2.4.1
## [28] locfit_1.5-9.4 bitops_1.0-6 assertthat_0.2.1
## [31] nnet_7.3-12 gtable_0.3.0 sandwich_2.5-1
## [34] rlang_0.4.7 genefilter_1.68.0 acepack_1.4.1
## [37] broom_0.7.0 checkmate_2.0.0 modelr_0.1.8
## [40] yaml_2.2.1 abind_1.4-5 backports_1.1.8
## [43] tensorA_0.36.1 tools_3.6.1 ellipsis_0.3.1
## [46] biomformat_1.14.0 Rcpp_1.0.5 base64enc_0.1-3
## [49] zlibbioc_1.32.0 RCurl_1.98-1.2 rpart_4.1-15
## [52] zoo_1.8-8 haven_2.3.1 cluster_2.1.0
## [55] fs_1.4.1 magrittr_1.5 openxlsx_4.1.5
## [58] reprex_0.3.0 hms_0.5.3 evaluate_0.14
## [61] xtable_1.8-4 XML_3.99-0.3 rio_0.5.16
## [64] emdbook_1.3.12 jpeg_0.1-8.1 readxl_1.3.1
## [67] gridExtra_2.3 compiler_3.6.1 bdsmatrix_1.3-4
## [70] crayon_1.3.4 minqa_1.2.4 htmltools_0.5.0
## [73] mgcv_1.8-28 geneplotter_1.64.0 lubridate_1.7.9
## [76] DBI_1.1.0 dbplyr_1.4.4 boot_1.3-22
## [79] ade4_1.7-15 cli_2.0.2 gdata_2.18.0
## [82] igraph_1.2.5 pkgconfig_2.0.3 numDeriv_2016.8-1.1
## [85] foreign_0.8-71 xml2_1.3.2 foreach_1.5.0
## [88] annotate_1.64.0 multtest_2.42.0 XVector_0.26.0
## [91] estimability_1.3 rvest_0.3.6 digest_0.6.25
## [94] Biostrings_2.54.0 rmarkdown_2.3 cellranger_1.1.0
## [97] htmlTable_2.0.1 curl_4.3 gtools_3.8.2
## [100] nloptr_1.2.2.1 lifecycle_0.2.0 jsonlite_1.6.1
## [103] Rhdf5lib_1.8.0 fansi_0.4.1 pillar_1.4.6
## [106] httr_1.4.2 DEoptimR_1.0-8 glue_1.4.1
## [109] zip_2.0.4 png_0.1-7 iterators_1.0.12
## [112] bit_4.0.4 stringi_1.4.6 blob_1.2.1
## [115] latticeExtra_0.6-29 memoise_1.1.0