This HTML document contains the R-code that was used to perform analyses and create figures for the corresponding manuscript. At the end of the document the R version and package versions can be found. Should one want to run this R project in R-studio, it is advised to use the same R version and package versions as have been used here.

1. Load R packages

#Warnings and messages have been inactivated for this markdown
library(phyloseq)
library(microbiome)
library(microbiomeutilities)
library(ggplot2)
library(ggpubr)
library(plyr)
library(dplyr)
library(ape)
library(reshape2)
library(scales)
library(knitr)
library(ggrepel)
library(nlme)
library(lme4)
library(sciplot)
library(apeglm)
library(pheatmap)
library(decontam)
library(Hmisc)
library(picante)
library(emmeans)
library(multcomp)
library(multcompView)
library(data.table) 
library(purrr) 
library(viridis) 
library(RColorBrewer)
library(ALDEx2)
library(compositions)
library(zCompositions)
library(CoDaSeq)
library(propr)
library(vegan)
library(DT)
library(DESeq2)
library(psych)
library(car)
library(ggvegan)
library(metamicrobiomeR)
library(corrplot)
library(tidyverse)
library(rstatix)
library(datarium)
library(tidyverse)
library(ggpubr)

2. Directory structure (run only once)

# Create Folders as following

# Figures 
dir.create("figures")  

# Phyloseq objects  
dir.create("phyobjects")  

# Phyloseq objects  
dir.create("output_data")  

3. Data input and subsetting

3.1. Loading data

Loading Illumina sequencing data of library 1-14. Data was run through NGTax 2.0 and SILVA database version 132 was used as reference database.

Create phyloseq object

# create phyloseq object of all libraries
ps <- read_phyloseq(otu.file = "./input_data/Galaxy58-[NG-Tax__porcine_study_all_samples_eightrun_length70bp_11march2020].biom1", 
                    taxonomy.file = NULL, 
                    metadata.file = "./input_data/Metadata_porcine_study_june_2020.csv", 
                    type = "biom")
## Time to complete depends on OTU file size
# create tree object
treefile <- read.tree("./input_data/all_otus_eightrun_length70bp_11march2020.tre")
# merge tree and phyloseq object
ps1.all <- merge_phyloseq(ps,treefile)
print(ps1.all)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4157 taxa and 490 samples ]
## sample_data() Sample Data:       [ 490 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 4157 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 4157 tips and 4156 internal nodes ]
# export taxonomy table (for Windows)
taxonomy_table_ps1 <- as.data.frame(ps1.all@tax_table)
write.csv(taxonomy_table_ps1, file = "./output_data/taxonomy_table_ps1.csv", fileEncoding = "UTF-16LE") 
ntaxa(ps1.all)
## [1] 4157

Remove pattern in taxa names

# remove pattern in taxa names
tax_table(ps1.all)[,colnames(tax_table(ps1.all))] <- gsub(tax_table(ps1.all)[,colnames(tax_table(ps1.all))],pattern="[a-z]__",replacement="")

Export phyloseq object

# this is the raw, unfiltered phyloseq file
saveRDS(ps1.all, "./phyobjects/ps1_all.rds")
ps1.all <- readRDS("./phyobjects/ps1_all.rds")

# create new object to work with
ps1 <- ps1.all

3.2. Cleaning data

Removing mitochondrial and chloroplast reads

# replace empty fields by "Unmatched_level" 
# this is needed because otherwise the microbiome::aggregate_taxa() function will not run
tax_table(ps1)[tax_table(ps1)[,"Phylum"]== "","Phylum"] <- "Unmatched_phylum"
tax_table(ps1)[tax_table(ps1)[,"Class"]== "","Class"] <- "Unmatched_class"
tax_table(ps1)[tax_table(ps1)[,"Order"]== "","Order"] <- "Unmatched_order"
tax_table(ps1)[tax_table(ps1)[,"Family"]== "","Family"] <- "Unmatched_family"
tax_table(ps1)[tax_table(ps1)[,"Genus"]== "","Genus"] <- "Unmatched_genus"

# remove mitochondria and chloroplasts
ps1 <- subset_taxa(ps1, Family != "Mitochondria")
ps1 <- subset_taxa(ps1, Class != "Chloroplast") #No Chloroplasts found in class Chloroplasts

ps1.mitoch <- subset_taxa(ps1.all, Family == "Mitochondria")
ps1.mitoch <- prune_samples(sample_sums(ps1.mitoch)>0,ps1.mitoch) 
sort(sample_sums(ps1.mitoch))
##     m1338     m3041     m3019     m3073     m2207 m3023ctrl     m3012     m3013 
##       198       242       268       278       344       379       829      1191 
##     m1318     m3024     m3037     m3025     m3018     m3040     m2206     m3015 
##      2110      2731      4951      5077      5363      9682     20914     21652 
##     m1323     m2194     m3021 
##     23240     33679     72570
# There are however Oxyphotobacteria (Dutch translation: blauwalgen) present in the dataset, which are not removed from the dataset:
ps1.chloro <- subset_taxa(ps1.all, Order == "Chloroplast")
ps1.chloro <- prune_samples(sample_sums(ps1.chloro)>0,ps1.chloro) 
sort(sample_sums(ps1.chloro))
##  water.blank.9 water.blank.13          m1330          m2190          m2207 
##              2             36            237            259            277 
##          m1398          m1356          m1333          m3041          m3026 
##            309            546            607            700            725 
##          m3073          m3013          m2232          m3036          m3019 
##            726            733           1208           1430           2380 
##          m1318          m3018          m3024          m3025          m3012 
##           2832           3166           3246           3417           4065 
##          m2206          m3015          m3040          m3037          m2194 
##          10324          11572          11760          18781          49513 
##          m1323          m3021 
##          60720          81622
print(ps1)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4123 taxa and 490 samples ]
## sample_data() Sample Data:       [ 490 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 4123 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 4123 tips and 4122 internal nodes ]
ntaxa(ps1.all)-ntaxa(ps1) # 34 ASVs removed which were belonging to mitochondria
## [1] 34

3.3. Decontaminating dataset

Remove contaminant reads, identified by visual inspection of each ASV individually using correlation plots [DNA_reading] vs rel. ASV abundance. Resulted in 8 ASVs identified as contaminants.

# remove samples that have no reads. Does not do anything as in this dataset there are reads in every sample:
psx <- prune_samples(sample_sums(ps1)>0,ps1) 

# add higher taxa names
psx.tax <- as.data.frame(psx@tax_table)
psx.tax$OTU <- rownames(psx.tax)

# import contaminant OTU table
visContam <- read.delim("./input_data/Contaminant_OTU_by_visual_identification.txt",header=T)

psx.tax2 <- psx.tax 
rownames(psx.tax2) <- NULL # reset rownames for subset based on index
psx.tax2$contam <- rownames(psx.tax2) %in% visContam$OTU
table(psx.tax2$contam) # works: 8 OTUs T, rest F
## 
## FALSE  TRUE 
##  4115     8
rownames(psx.tax2) <- psx.tax2$OTU # restore OTU as rownames
psx.tax2$reads <- taxa_sums(psx)

# calculate % contaminant reads
contam.otu2 <- subset(psx.tax2, contam == "TRUE")
(sum(contam.otu2$reads) / sum(abundances(psx)))
## [1] 0.000306176
# 0.0306% of all reads

# subset phyloseq to contaminant OTUs
psx.sub <- subset_taxa(psx, psx.tax2$contam)

ps1.decontam <- prune_taxa(!psx.tax2$contam, ps1) 
ps1.decontam
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4115 taxa and 490 samples ]
## sample_data() Sample Data:       [ 490 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 4115 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 4115 tips and 4114 internal nodes ]
ps1.decontam <- prune_samples(sample_sums(ps1.decontam)>0, ps1.decontam)
ps1.decontam # 4115 taxa, 489 samples
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4115 taxa and 489 samples ]
## sample_data() Sample Data:       [ 489 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 4115 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 4115 tips and 4114 internal nodes ]
# samples with 0 reads after removing contaminants: 1 (water.blank.10)
# (code in following 2 lines only runs if not running prune_samples command above, used to identify removed samples)
#sample0 <- subset_samples(ps1.decontam, sample_sums(ps1.decontam)==0)
#sample_data(sample0)

ntaxa(ps1)-ntaxa(ps1.decontam)
## [1] 8
# 8 OTUs removed (resulting from the visual identification of contaminants)

sum(abundances(ps1)) - sum(abundances(ps1.decontam))
## [1] 26819
# 26819 reads removed.

3.4. Export phyloseq objects

# dataset without mitochondrial/chloroplast reads, but with contaminants
saveRDS(ps1, "./phyobjects/ps1.rds")
ps1 <- readRDS("./phyobjects/ps1.rds")

# decontaminated dataset
saveRDS(ps1.decontam, "./phyobjects/ps1.decontam.rds")
ps1.decontam <- readRDS("./phyobjects/ps1.decontam.rds")

3.5. Subsetting

# all following objects are without contaminant ASVs:
ps1.unique <- subset_samples(ps1.decontam, Unique == "yes")
ps1.noROM <- subset_samples(ps1.unique, Treatment != "ROM")
ps1.EcN <- subset_samples(ps1.noROM, Treatment != "Y_glucan")
ps1.EcN <- prune_taxa(taxa_sums(otu_table(ps1.EcN))>0, ps1.EcN)
ps1.faeces <- subset_samples(ps1.EcN, Origin == "faeces")
ps1.faeces <- prune_taxa(taxa_sums(otu_table(ps1.faeces))>0, ps1.faeces)
ps1.faeces.pre <- subset_samples(ps1.faeces, Pre_or_post_weaning == "Pre_weaning")
ps1.faeces.pre <- prune_taxa(taxa_sums(otu_table(ps1.faeces.pre))>0, ps1.faeces.pre)
ps1.faeces.post <- subset_samples(ps1.faeces, Pre_or_post_weaning == "Post_weaning")
ps1.faeces.post <- prune_taxa(taxa_sums(otu_table(ps1.faeces.post))>0, ps1.faeces.post)
ps1.digesta <- subset_samples(ps1.EcN, Origin %in% c("jejunum_digesta", "ileum_digesta", "caecum_digesta"))
ps1.digesta <- prune_taxa(taxa_sums(otu_table(ps1.digesta))>0, ps1.digesta)
ps1.jejunum.digesta <- subset_samples(ps1.EcN, Origin %in% c("jejunum_digesta"))
ps1.jejunum.digesta <- prune_taxa(taxa_sums(otu_table(ps1.jejunum.digesta))>0, ps1.jejunum.digesta)
ps1.ileum.digesta <- subset_samples(ps1.EcN, Origin %in% c("ileum_digesta"))
ps1.ileum.digesta <- prune_taxa(taxa_sums(otu_table(ps1.ileum.digesta))>0, ps1.ileum.digesta)
ps1.caecum.digesta <- subset_samples(ps1.EcN, Origin %in% c("caecum_digesta"))
ps1.caecum.digesta <- prune_taxa(taxa_sums(otu_table(ps1.caecum.digesta))>0, ps1.caecum.digesta)
ps1.swabs <- subset_samples(ps1.faeces, Swab %in% c("swab"))
ps1.swabs <- prune_taxa(taxa_sums(otu_table(ps1.swabs))>0, ps1.swabs)
ps1.no.swabs <- subset_samples(ps1.faeces, Swab %in% c("no_swab"))
ps1.no.swabs <- prune_taxa(taxa_sums(otu_table(ps1.no.swabs))>0, ps1.no.swabs)
ps1.enterobacteriaceae <- subset_taxa(ps1.EcN, Family == "Enterobacteriaceae")
print(ps1.faeces) # 287 samples
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 2305 taxa and 286 samples ]
## sample_data() Sample Data:       [ 286 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 2305 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 2305 tips and 2304 internal nodes ]
# filter out archaea and place in phyloseq object
ps1.archaea <- subset_taxa(ps1.EcN, Phylum == "Euryarchaeota")
ps1.archaea
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 28 taxa and 420 samples ]
## sample_data() Sample Data:       [ 420 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 28 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 28 tips and 27 internal nodes ]

3.6. Subsetting for quality checks

#   negative controls - includes contaminant reads (ps1)
ps1.contr <- subset_samples(ps1, Origin %in% c("blank"))
ps1.contr <- prune_taxa(taxa_sums(otu_table(ps1.contr))>0, ps1.contr)
print(ps1.contr)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 202 taxa and 14 samples ]
## sample_data() Sample Data:       [ 14 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 202 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 202 tips and 201 internal nodes ]
# positive controls (mocks) - with contaminant reads (ps1)
ps1.mock <- subset_samples(ps1, Origin %in% c("mock.3","mock.4"))
ps1.mock <- prune_taxa(taxa_sums(otu_table(ps1.mock))>0, ps1.mock)
print(ps1.mock)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 112 taxa and 26 samples ]
## sample_data() Sample Data:       [ 26 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 112 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 112 tips and 111 internal nodes ]
# technical duplicates (PCR) - includes contaminant reads (ps1)
ps1.tech <- subset_samples(ps1, Description %in% c("m1858", "m1858R1","m1858R2","m1858R3", "m2158", "m2158R1", "m2158R2", "m2158R3", "m169", "m169R1", "m169R2", "m169R3", "m169R4", "m169R5", "m169R6", "m924", "m924R1", "m924R2", "m924R3", "m924R4", "m924R5", "m924R6"))
ps1.tech <- prune_taxa(taxa_sums(otu_table(ps1.tech))>0, ps1.tech)
print(ps1.tech)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 345 taxa and 22 samples ]
## sample_data() Sample Data:       [ 22 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 345 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 345 tips and 344 internal nodes ]
#   subset mitochondrial sequences
ps1.mit <- subset_taxa(ps1.all, Family == "Mitochondria")
print(ps1.mit)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 34 taxa and 490 samples ]
## sample_data() Sample Data:       [ 490 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 34 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 34 tips and 33 internal nodes ]
# subset chloroplast sequences
#ps1.chloro <- subset_taxa(ps1.all, Class == "Chloroplast") #does not run, since there do not seem to be chloroplast reads present

3.7. Save subsets

Saving subsets in compressed formats (RDS).

# with contaminants
saveRDS(ps1.decontam, "./phyobjects/ps1.decontam.rds")
saveRDS(ps1.unique, "./phyobjects/ps1.unique.rds")
saveRDS(ps1.all, "./phyobjects/ps1.all.rds")
saveRDS(ps1.tech, "./phyobjects/ps1.tech.rds")
saveRDS(ps1.contr, "./phyobjects/ps1.contr.rds")
saveRDS(ps1.mock, "./phyobjects/ps1.mock.rds")
saveRDS(ps1.mit, "./phyobjects/ps1.mit.rds")
saveRDS(ps1.faeces, "./phyobjects/ps1.faeces.rds")
saveRDS(ps1.faeces.pre, "./phyobjects/ps1.faeces.pre.rds")
saveRDS(ps1.faeces.post, "./phyobjects/ps1.faeces.post.rds")
saveRDS(ps1.EcN, "./phyobjects/ps1.EcN.rds")
saveRDS(ps1.digesta, "./phyobjects/ps1.digesta.rds")
saveRDS(ps1.jejunum.digesta, "./phyobjects/ps1.jejunum.digesta.rds")
saveRDS(ps1.ileum.digesta, "./phyobjects/ps1.ileum.digesta.rds") 
saveRDS(ps1.caecum.digesta, "./phyobjects/ps1.caecum.digesta.rds")
saveRDS(ps1.swabs, "./phyobjects/ps1.swabs.rds")
saveRDS(ps1.no.swabs, "./phyobjects/ps1.no.swabs.rds")
saveRDS(ps1.archaea, "./phyobjects/ps1.archaea.rds")
saveRDS(ps1.enterobacteriaceae, "./phyobjects/ps1.enterobacteriaceae.rds")

# load full datasets from RDS
ps1.all <- readRDS("./phyobjects/ps1.all.rds")

3.8. Exploring specs

Exploring the stats of the datasets: # reads, ASVs, nr of genera, etc.

sum(sample_sums(ps1.all)) # raw phyloseq
## [1] 87799119
sum(sample_sums(ps1)) # excl mitrochondria/chloroplast, incl contaminants
## [1] 87593421
sum(sample_sums(ps1.decontam)) # excl mitrochondria/chloroplast, excl contaminants
## [1] 87566602
print(ps1)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 4123 taxa and 490 samples ]
## sample_data() Sample Data:       [ 490 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 4123 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 4123 tips and 4122 internal nodes ]
ntaxa(aggregate_taxa(ps1,"Genus"))
## [1] 429
colnames(tax_table(ps1))
## [1] "Domain" "Phylum" "Class"  "Order"  "Family" "Genus"

4. Alpha diversity analyses

4.1. Loading data for plots

ps1.faeces <- readRDS("./phyobjects/ps1.faeces.rds") #all faecal samples

# this line is used to make sure the order of days is chronological:
ps1.faeces@sam_data$Day_of_study_factor <- factor(ps1.faeces@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70")) 
# this line is used to make sure pre-weaning is followed by post-weaning:
ps1.faeces@sam_data$Pre_or_post_weaning <- factor(ps1.faeces@sam_data$Pre_or_post_weaning, levels = c("Pre-weaning", "Post-weaning")) 

ps1.digesta <- readRDS("./phyobjects/ps1.digesta.rds") #all digesta samples
# this line is used to reorder segments according order luminal content passages through the GI tract of the animals:
ps1.digesta@sam_data$Origin <- factor(ps1.digesta@sam_data$Origin, levels = c("jejunum_digesta", "ileum_digesta", "caecum_digesta")) 
print(ps1.digesta) #contains 1608 taxa in 134 samples
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1626 taxa and 134 samples ]
## sample_data() Sample Data:       [ 134 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 1626 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1626 tips and 1625 internal nodes ]
ps1.EcN <- readRDS("./phyobjects/ps1.EcN.rds") #all faecal and digestal samples of the current study

# this line is used to combine faeces and digesta in an orderly fashion for use in the alpha-diversity plots:
ps1.EcN@sam_data$Origin_Day <- factor(ps1.EcN@sam_data$Origin_Day, levels = c("faeces_Day_4", "faeces_Day_8", "faeces_Day_14", "faeces_Day_26", "faeces_Day_35", "faeces_Day_43", "faeces_Day_59", "faeces_Day_69", "jejunum_digesta_Day_27", "jejunum_digesta_Day_44", "jejunum_digesta_Day_70", "ileum_digesta_Day_27", "ileum_digesta_Day_44", "ileum_digesta_Day_70", "caecum_digesta_Day_27", "caecum_digesta_Day_44", "caecum_digesta_Day_70")) 

ps1.proteobacteria <- readRDS("./phyobjects/ps1.EcN.rds") 

ps1.proteobacteria <- subset_taxa(ps1.proteobacteria, Phylum == "Proteobacteria")

ps1.proteobacteria <- subset_samples(ps1.proteobacteria, Day_of_study_factor %in% c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27"))

ps1.proteobacteria <- subset_samples(ps1.proteobacteria, Origin %in% c("faeces"))

# this line is used to make sure the order of days is chronological:
ps1.proteobacteria@sam_data$Day_of_study_factor <- factor(ps1.proteobacteria@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70")) 
# this line is used to make sure pre-weaning is followed by post-weaning:
ps1.proteobacteria@sam_data$Pre_or_post_weaning <- factor(ps1.proteobacteria@sam_data$Pre_or_post_weaning, levels = c("Pre-weaning", "Post-weaning")) 

ps1.proteobacteria@sam_data$Origin_Day <- factor(ps1.proteobacteria@sam_data$Origin_Day, levels = c("faeces_Day_4", "faeces_Day_8", "faeces_Day_14", "faeces_Day_26", "faeces_Day_35", "faeces_Day_43", "faeces_Day_59", "faeces_Day_69", "jejunum_digesta_Day_27", "jejunum_digesta_Day_44", "jejunum_digesta_Day_70", "ileum_digesta_Day_27", "ileum_digesta_Day_44", "ileum_digesta_Day_70", "caecum_digesta_Day_27", "caecum_digesta_Day_44", "caecum_digesta_Day_70")) 

4.2. Plotting

p <- plot_richness(ps1.faeces, "Day_of_study_factor", color = "Treatment", measures = "Shannon")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Day_of_study")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha=1, size=1.5) #+ theme_bw()
p$layers <- p$layers[-1]
print (p) #this plot is created for illustrative purposes, and is not saved or used in the manuscript

p <- plot_richness(ps1.digesta, "Day_of_study_factor", color = "Treatment", measures = "Shannon")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Day_of_study")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha=1, size=1.5)
p <- p + facet_wrap("Origin")
p$layers <- p$layers[-1]
print (p) #this plot is created for illustrative purposes, and is not saved or used in the manuscript

p <- plot_richness(ps1.EcN, "Origin_Day", color = "Treatment", measures = "InvSimpson")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Origin_Day")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha = .9, size=2)
p$layers <- p$layers[-1]
#The final figure can be found under panel A in Figure 2 of the manuscript
print(p)

ggsave("./figures/alpha_diversity_faeces_and_digesta_inv_simpson4_no_theme_h7_w11_size2_original.pdf", height = 7, width = 11)

# following figures combine the faeces and digesta alpha diversities
p <- plot_richness(ps1.EcN, "Origin_Day", color = "Treatment", measures = "Shannon")
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
p <- p + geom_boxplot(outlier.shape = NA, aes(fill = "Origin_Day")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578"))
p <- p + geom_point(position=position_jitterdodge(), alpha = .9, size=2)
p$layers <- p$layers[-1]
#The final figure can be found under panel B in Figure 2 of the manuscript
print(p)

ggsave("./figures/alpha_diversity_faeces_and_digesta_shannon4_no_theme_h7_w11_size2_original.pdf", height = 7, width = 11)

p <- plot_richness(ps1.proteobacteria, "Origin_Day", color = "Treatment", measures = "Observed") + geom_boxplot(outlier.shape = NA, aes(fill = "Origin_Day")) + scale_fill_manual(values = c("white", "#5F7FC7", "orange","#DA5724", "#508578")) + geom_point(shape = 21, aes(alpha = sqrt(EcN_NGS_relative)), position=position_jitterdodge(seed = 456), size=0.5, stroke = 2) + geom_point(position=position_jitterdodge(seed = 456), size = 0.5)     
## Warning in estimate_richness(physeq, split = TRUE, measures = measures): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
#p <- p + geom_point(position=position_jitterdodge(), alpha = .9, size=2)
p$layers <- p$layers[-1]
#The final figure can be found under panel B in Figure 2 of the manuscript
print(p)

ggsave("./figures/alpha_diversity_faeces_observed_proteobacteria.pdf", height = 7, width = 11)

4.3. Loading data for LME Models

Loading data for LME (Linear Mixed-Effects models)

ps1 <- readRDS("./phyobjects/ps1.EcN.rds")

ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))

ps1.fcs <- subset_samples(ps1, Origin %in% c("faeces"))
ps1.fcs.pre <- subset_samples(ps1.fcs, Pre_or_post_weaning %in% c("Pre_weaning"))
ps1.fcs.pre.proteobacteria <- subset_taxa(ps1.fcs.pre, Phylum == "Proteobacteria")

ps1.fcs <- prune_taxa(taxa_sums(otu_table(ps1.fcs))>0, ps1.fcs)
ps1.fcs.pre <- prune_taxa(taxa_sums(otu_table(ps1.fcs.pre))>0, ps1.fcs.pre)
ps1.fcs.pre.proteobacteria <- prune_taxa(taxa_sums(otu_table(ps1.fcs.pre.proteobacteria))>0, ps1.fcs.pre.proteobacteria)

ps1.fcs.r <- microbiome::transform(ps1.fcs, "compositional")
ps1.fcs.pre.proteobacteria.r <- microbiome::transform(ps1.fcs.pre.proteobacteria, "compositional")
ps1.fcs.pre.r <- microbiome::transform(ps1.fcs.pre, "compositional")

# input for alpha diversity
ps1.otu <- as.data.frame(ps1.fcs.r@otu_table)
ps1.tree <- ps1.fcs.r@phy_tree

ps1.pre.otu <- as.data.frame(ps1.fcs.pre.r@otu_table)
ps1.pre.tree <- ps1.fcs.pre.r@phy_tree

ps1.pre.proteobacteria.otu <- as.data.frame(ps1.fcs.pre.proteobacteria.r@otu_table)
ps1.pre.proteobacteria.tree <- ps1.fcs.pre.proteobacteria.r@phy_tree

4.4. Alpha diversity - all samples

# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df <- data.frame("Description" = colnames(ps1.otu))

ad.df$Shannon <- estimate_richness(ps1.fcs)$Shannon
## Warning in estimate_richness(ps1.fcs): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df$InvSimpson <- estimate_richness(ps1.fcs)$InvSimpson
## Warning in estimate_richness(ps1.fcs): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df <- merge(meta(ps1.fcs.r), ad.df, by = "Description", all.x=T)

ad.df <- ad.df

4.5. LME on all faecal samples

# mixed model: random term and variance structure
lme.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
              data = ad.df, method = "REML",
              random = ~ 1 | Ear_tag)

lme.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
              data = ad.df, method = "REML",
              random = ~ 1 | Ear_tag)

# residual plots to visually inspect normality assumption
plot(resid(lme.pd1, method= "pearson")~ad.df$Day_of_study_factor); abline(0,0)

hist(resid(lme.pd1, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.pd1, ∼ranef (.))

qqnorm(lme.pd1)

plot(resid(lme.pd2, method= "pearson")~ad.df$Day_of_study_factor); abline(0,0)

hist(resid(lme.pd2, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.pd2, ∼ranef (.))

qqnorm(lme.pd2)

# model output
aov.pd.ls1 <- anova(lme.pd1) #Shannon diversity
aov.pd.ls1 #no significant difference on all faecal samples between treatment groups
##                               numDF denDF  F-value p-value
## (Intercept)                       1   224 33750.89  <.0001
## Day_of_study_factor               7   224    27.89  <.0001
## Treatment                         1    46     0.06  0.8071
## Day_of_study_factor:Treatment     7   224     2.50  0.0173
aov.pd.ls2 <- anova(lme.pd2) #InvSimpson diversity
aov.pd.ls2 #no significant difference on all faecal samples between treatment groups
##                               numDF denDF   F-value p-value
## (Intercept)                       1   224 1474.2569  <.0001
## Day_of_study_factor               7   224   12.5348  <.0001
## Treatment                         1    46    0.4290  0.5157
## Day_of_study_factor:Treatment     7   224    2.0856  0.0461
# test differences between timepoints - Shannon diversity
res.aov2 <- aov(Shannon ~ Day_of_study_factor, data = ad.df)
summary(res.aov2)
##                      Df Sum Sq Mean Sq F value Pr(>F)    
## Day_of_study_factor   7  22.42   3.203   26.96 <2e-16 ***
## Residuals           278  33.02   0.119                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Shannon ~ Day_of_study_factor, data = ad.df)
## 
## $Day_of_study_factor
##                      diff         lwr        upr     p adj
## Day_8-Day_4    0.31976503  0.10489282 0.53463724 0.0002185
## Day_14-Day_4   0.40313810  0.18712597 0.61915022 0.0000008
## Day_26-Day_4   0.55341430  0.33740218 0.76942643 0.0000000
## Day_35-Day_4   0.90604172  0.66580728 1.14627615 0.0000000
## Day_43-Day_4   0.77912256  0.53888812 1.01935699 0.0000000
## Day_59-Day_4   0.60190367  0.29802848 0.90577887 0.0000001
## Day_69-Day_4   0.69011902  0.38624382 0.99399421 0.0000000
## Day_14-Day_8   0.08337307 -0.13263906 0.29938519 0.9374044
## Day_26-Day_8   0.23364927  0.01763715 0.44966140 0.0236782
## Day_35-Day_8   0.58627669  0.34604225 0.82651112 0.0000000
## Day_43-Day_8   0.45935753  0.21912309 0.69959196 0.0000004
## Day_59-Day_8   0.28213864 -0.02173655 0.58601384 0.0905096
## Day_69-Day_8   0.37035398  0.06647879 0.67422918 0.0057767
## Day_26-Day_14  0.15027621 -0.06686985 0.36742226 0.4087047
## Day_35-Day_14  0.50290362  0.26164908 0.74415816 0.0000000
## Day_43-Day_14  0.37598446  0.13472992 0.61723900 0.0000843
## Day_59-Day_14  0.19876558 -0.10591672 0.50344788 0.4888469
## Day_69-Day_14  0.28698092 -0.01770138 0.59166322 0.0813335
## Day_35-Day_26  0.35262741  0.11137287 0.59388196 0.0003100
## Day_43-Day_26  0.22570825 -0.01554629 0.46696280 0.0855660
## Day_59-Day_26  0.04848937 -0.25619293 0.35317167 0.9997143
## Day_69-Day_26  0.13670471 -0.16797759 0.44138701 0.8699735
## Day_43-Day_35 -0.12691916 -0.39008280 0.13624448 0.8212386
## Day_59-Day_35 -0.30413805 -0.62644636 0.01817027 0.0802139
## Day_69-Day_35 -0.21592270 -0.53823102 0.10638561 0.4528516
## Day_59-Day_43 -0.17721889 -0.49952720 0.14508943 0.7006721
## Day_69-Day_43 -0.08900354 -0.41131186 0.23330477 0.9903988
## Day_69-Day_59  0.08821534 -0.28395424 0.46038493 0.9962323
# test differences between timepoints - InvSimpson diversity
res.aov2 <- aov(InvSimpson ~ Day_of_study_factor, data = ad.df)
summary(res.aov2)
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## Day_of_study_factor   7   8370  1195.8   12.23 1.27e-13 ***
## Residuals           278  27190    97.8                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(res.aov2, which = "Day_of_study_factor")
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = InvSimpson ~ Day_of_study_factor, data = ad.df)
## 
## $Day_of_study_factor
##                      diff          lwr          upr     p adj
## Day_8-Day_4    3.33470622  -2.83085321  9.500265648 0.7181115
## Day_14-Day_4   6.36693246   0.16866427 12.565200659 0.0392907
## Day_26-Day_4  10.76677344   4.56850525 16.965041637 0.0000063
## Day_35-Day_4  17.96301526  11.06971026 24.856320261 0.0000000
## Day_43-Day_4  12.31179973   5.41849473 19.205104730 0.0000030
## Day_59-Day_4   8.70841903  -0.01099873 17.427836796 0.0505517
## Day_69-Day_4   8.73454908   0.01513131 17.453966839 0.0492494
## Day_14-Day_8   3.03222624  -3.16604195  9.230494438 0.8101714
## Day_26-Day_8   7.43206722   1.23379903 13.630335417 0.0071567
## Day_35-Day_8  14.62830904   7.73500404 21.521614040 0.0000000
## Day_43-Day_8   8.97709351   2.08378851 15.870398510 0.0022395
## Day_59-Day_8   5.37371281  -3.34570495 14.093130576 0.5642845
## Day_69-Day_8   5.39984286  -3.31957491 14.119260619 0.5579884
## Day_26-Day_14  4.39984098  -1.83096428 10.630646239 0.3814915
## Day_35-Day_14 11.59608280   4.67350673 18.518658864 0.0000160
## Day_43-Day_14  5.94486727  -0.97770880 12.867443333 0.1521748
## Day_59-Day_14  2.34148657  -6.40109038 11.084063518 0.9920134
## Day_69-Day_14  2.36761661  -6.37496033 11.110193561 0.9914599
## Day_35-Day_26  7.19624182   0.27366575 14.118817885 0.0351338
## Day_43-Day_26  1.54502629  -5.37754978  8.467602354 0.9974194
## Day_59-Day_26 -2.05835441 -10.80093136  6.684222539 0.9963876
## Day_69-Day_26 -2.03222437 -10.77480131  6.710352582 0.9966654
## Day_43-Day_35 -5.65121553 -13.20245282  1.900021759 0.3052277
## Day_59-Day_35 -9.25459623 -18.50293537 -0.006257085 0.0497062
## Day_69-Day_35 -9.22846618 -18.47680533  0.019872958 0.0509429
## Day_59-Day_43 -3.60338070 -12.85171984  5.644958445 0.9343107
## Day_69-Day_43 -3.57725065 -12.82558980  5.671088488 0.9367079
## Day_69-Day_59  0.02613004 -10.65293214 10.705192230 1.0000000

4.6. Alpha diversity - pre-weaning

# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df.pre <- data.frame("Description" = colnames(ps1.pre.otu))

ad.df.pre$Shannon <- estimate_richness(ps1.fcs.pre)$Shannon
## Warning in estimate_richness(ps1.fcs.pre): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df.pre$InvSimpson <- estimate_richness(ps1.fcs.pre)$InvSimpson
## Warning in estimate_richness(ps1.fcs.pre): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df.pre <- merge(meta(ps1.fcs.pre.r), ad.df.pre, by = "Description", all.x=T)

ad.df.pre <- ad.df.pre

4.7. LME pre-weaning faecal samples

# mixed model: random term and variance structure
lme.pre.pd1 <- lme(Shannon ~ Day_of_study_factor * Treatment,
              data = ad.df.pre, method = "REML",
              random = ~ 1 | Ear_tag)

lme.pre.pd2 <- lme(InvSimpson ~ Day_of_study_factor * Treatment,
              data = ad.df.pre, method = "REML",
              random = ~ 1 | Ear_tag)


# residual plots to visually inspect normality assumption
plot(resid(lme.pre.pd1, method= "pearson")~ad.df.pre$Day_of_study_factor); abline(0,0)

hist(resid(lme.pre.pd1, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.pre.pd1, ∼ranef (.))

qqnorm(lme.pre.pd1)

plot(resid(lme.pre.pd2, method= "pearson")~ad.df.pre$Day_of_study_factor); abline(0,0)

hist(resid(lme.pre.pd2, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.pre.pd2, ∼ranef (.))

qqnorm(lme.pre.pd2)

# model output
aov.pd.ls1 <- anova(lme.pre.pd1)
aov.pd.ls1
##                               numDF denDF   F-value p-value
## (Intercept)                       1   136 19714.580  <.0001
## Day_of_study_factor               3   136    24.048  <.0001
## Treatment                         1    46     1.739  0.1938
## Day_of_study_factor:Treatment     3   136     2.317  0.0784
aov.pd.ls2 <- anova(lme.pre.pd2)
aov.pd.ls2
##                               numDF denDF   F-value p-value
## (Intercept)                       1   136 1222.0842  <.0001
## Day_of_study_factor               3   136   17.9269  <.0001
## Treatment                         1    46    2.8908  0.0958
## Day_of_study_factor:Treatment     3   136    2.0146  0.1148

4.8. Alpha diversity - Proteobacteria

# calculate Shannon and InvSimpson alpha diversity and add metadata
ad.df.pre.proteobacteria <- data.frame("Description" = colnames(ps1.pre.proteobacteria.otu))

ad.df.pre.proteobacteria$Observed <- estimate_richness(ps1.fcs.pre.proteobacteria, measures = "Observed")$Observed
## Warning in estimate_richness(ps1.fcs.pre.proteobacteria, measures = "Observed"): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df.pre.proteobacteria$Shannon <- estimate_richness(ps1.fcs.pre.proteobacteria, measures = "Shannon")$Shannon
## Warning in estimate_richness(ps1.fcs.pre.proteobacteria, measures = "Shannon"): The data you have provided does not have
## any singletons. This is highly suspicious. Results of richness
## estimates (for example) are probably unreliable, or wrong, if you have already
## trimmed low-abundance taxa from the data.
## 
## We recommended that you find the un-trimmed data and retry.
ad.df.pre.proteobacteria <- merge(meta(ps1.fcs.pre.proteobacteria.r), ad.df.pre.proteobacteria, by = "Description", all.x=T)

ad.df.pre.proteobacteria <- ad.df.pre.proteobacteria

4.9. LME Proteobacteria

# mixed model: random term and variance structure
lme.pre.proteobacteria.pd1 <- lme(Observed ~ Day_of_study_factor * Treatment,
              data = ad.df.pre.proteobacteria, method = "REML",
              random = ~ 1 | Ear_tag)

lme.pre.proteobacteria.pd2 <- lme(Shannon ~ Day_of_study_factor * Treatment,
              data = ad.df.pre.proteobacteria, method = "REML",
              random = ~ 1 | Ear_tag)


# residual plots to visually inspect normality assumption
plot(resid(lme.pre.proteobacteria.pd1, method= "pearson")~ad.df.pre.proteobacteria$Day_of_study_factor); abline(0,0)

hist(resid(lme.pre.proteobacteria.pd1, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.pre.proteobacteria.pd1, ∼ranef (.))

qqnorm(lme.pre.proteobacteria.pd1)

plot(resid(lme.pre.proteobacteria.pd2, method= "pearson")~ad.df.pre.proteobacteria$Day_of_study_factor); abline(0,0)

hist(resid(lme.pre.proteobacteria.pd2, method= "pearson"), breaks=30, col="grey")

qqnorm(lme.pre.proteobacteria.pd2, ∼ranef (.))

qqnorm(lme.pre.proteobacteria.pd2)

# model output
aov.pd.ls1 <- anova(lme.pre.proteobacteria.pd1)
aov.pd.ls1
##                               numDF denDF  F-value p-value
## (Intercept)                       1   136 353.9852  <.0001
## Day_of_study_factor               3   136  12.0719  <.0001
## Treatment                         1    46   4.8152  0.0333
## Day_of_study_factor:Treatment     3   136   2.0137  0.1149
aov.pd.ls2 <- anova(lme.pre.proteobacteria.pd2)
aov.pd.ls2
##                               numDF denDF  F-value p-value
## (Intercept)                       1   136 612.6292  <.0001
## Day_of_study_factor               3   136   3.1742  0.0263
## Treatment                         1    46   0.1200  0.7306
## Day_of_study_factor:Treatment     3   136   0.5927  0.6208

5. Beta-diversity analysis

5.1. Input data PCoA plot

Input data Principal Coordinates Analysis plot

ps1 <- readRDS("./phyobjects/ps1.EcN.rds")

ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))

ps1 <- subset_samples(ps1, Origin %in% c("faeces")) 
ps1 <- prune_taxa(taxa_sums(ps1) > 0, ps1)
ps1 <- microbiome::transform(ps1, "compositional")

5.2. Principal Coordinates Analysis plot

Ordination plot based on Weighted Unifrac

set.seed(49275)
ordu.wt.uni = ordinate(ps1, "PCoA", "bray")

wt.unifrac <- plot_ordination(ps1, ordu.wt.uni, color="Day_of_study_factor", shape="Treatment")
wt.unifrac <- wt.unifrac + scale_color_viridis(discrete = TRUE, option = "C")+ scale_fill_viridis(discrete = TRUE) +
ggtitle("Ordination plot - PCoA, Weighted UniFrac faecal samples") + geom_point(size = 3) + scale_shape_manual(values=c(15,17))
wt.unifrac <- wt.unifrac + 
  stat_ellipse(type = "norm", linetype = 5) +
  theme_bw()
print(wt.unifrac)

pdf(file = "./figures/pcoa_bray_faecal_samples_shape_15_17_shape_treatment.pdf", height = 10, width = 15)
wt.unifrac
dev.off() #The final figure can be found as Figure 4 of the manuscript
## png 
##   2

5.3. Load beta-diversity data

ps1 <- readRDS("./phyobjects/ps1.EcN.rds")

ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))

# subsetting
ps1.fcs <- subset_samples(ps1, Origin %in% c("faeces"))
ps1.fcs.pre <- subset_samples(ps1.fcs, Pre_or_post_weaning %in% c("Pre_weaning"))
ps1.fcs.post <- subset_samples(ps1.fcs, Pre_or_post_weaning %in% c("Post_weaning"))

ps1.fcs.r <- microbiome::transform(ps1.fcs, "compositional")
ps1.fcs.pre.r <- microbiome::transform(ps1.fcs.pre, "compositional")
ps1.fcs.post.r <- microbiome::transform(ps1.fcs.post, "compositional")

# Conversions of data for Adonis tests
fcs.otu <- abundances(ps1.fcs.r)
fcs.meta <- meta(ps1.fcs.r)

fcs.otu.pre <- abundances(ps1.fcs.pre.r)
fcs.meta.pre <- meta(ps1.fcs.pre.r)

fcs.otu.post <- abundances(ps1.fcs.post.r)
fcs.meta.post <- meta(ps1.fcs.post.r)

5.4. PERMANOVA significance test

PERMANOVA significance test for group-level differences

set.seed(343)
permanova <- adonis(t(fcs.otu) ~ Treatment + Day_of_study_factor,
                    data = fcs.meta, permutations=9999, method = "bray")

print(as.data.frame(permanova$aov.tab)) #statistically significant differences in beta-diversity between treatment groups when taking into account all faecal timepoints (p = 0.0149)
##                      Df  SumsOfSqs   MeanSqs   F.Model          R2 Pr(>F)
## Treatment             1  0.4799172 0.4799172  2.056624 0.004823338 0.0149
## Day_of_study_factor   7 34.3805735 4.9115105 21.047653 0.345536953 0.0001
## Residuals           277 64.6384869 0.2333519        NA 0.649639709     NA
## Total               285 99.4989776        NA        NA 1.000000000     NA
set.seed(456)
permanova.pre <- adonis(t(fcs.otu.pre) ~ Treatment + Day_of_study_factor,
                    data = fcs.meta.pre, permutations=9999, method = "bray")

print(as.data.frame(permanova.pre$aov.tab)) #statistically significant differences in beta-diversity between treatment groups when taking into account pre-weaning faecal timepoints (p = 0.005)
##                      Df  SumsOfSqs   MeanSqs   F.Model         R2 Pr(>F)
## Treatment             1  0.6112093 0.6112093  2.445152 0.01005824  5e-03
## Day_of_study_factor   3 13.9117835 4.6372612 18.551438 0.22893638  1e-04
## Residuals           185 46.2440344 0.2499678        NA 0.76100538     NA
## Total               189 60.7670272        NA        NA 1.00000000     NA
set.seed(456)
permanova.post <- adonis(t(fcs.otu.post) ~ Treatment + Day_of_study_factor,
                    data = fcs.meta.post, permutations=9999, method = "bray")

print(as.data.frame(permanova.post$aov.tab)) #no statistically significant differences in beta-diversity between treatment groups when taking into account post-weaning faecal timepoints (p=0.0756)
##                     Df SumsOfSqs  MeanSqs  F.Model         R2 Pr(>F)
## Treatment            1  0.302365 0.302365 1.532204 0.01389173 0.0756
## Day_of_study_factor  3  3.505529 1.168510 5.921303 0.16105654 0.0001
## Residuals           91 17.957937 0.197340       NA 0.82505173     NA
## Total               95 21.765831       NA       NA 1.00000000     NA

5.5. Checking homogeneity assumption

# all faecal samples - dispersion between treatment groups
dist <- vegdist(t(fcs.otu), method="bray")

anova(betadisper(dist, fcs.meta$Treatment)) #statistical significant differences in dispersion between treatment groups, although really low sum of squares
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq   Mean Sq F value  Pr(>F)  
## Groups      1 0.00877 0.0087713  4.6162 0.03252 *
## Residuals 284 0.53963 0.0019001                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion <- betadisper(dist, group= fcs.meta$Treatment)

plot(dispersion, hull=FALSE, ellipse = TRUE) #plot shows there are not really differences in dispersion

# dispersion pre- vs post-weaning
anova(betadisper(dist, fcs.meta$Pre_or_post_weaning)) #statistical significant differences in dispersion between pre- and post-weaning samples, and high sum of squares
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## Groups      1 0.55784 0.55784  137.97 < 2.2e-16 ***
## Residuals 284 1.14825 0.00404                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion_weaning <- betadisper(dist, group= fcs.meta$Pre_or_post_weaning) 

plot(dispersion_weaning, hull=FALSE, ellipse = TRUE)

# dispersion in each time-point
anova(betadisper(dist, fcs.meta$Day_of_study_factor)) #statistical significant differences in dispersion between time points
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq  Mean Sq F value    Pr(>F)    
## Groups      7 0.37805 0.054007   9.122 3.804e-10 ***
## Residuals 278 1.64588 0.005920                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion_day <- betadisper(dist, group = fcs.meta$Day_of_study_factor)

plot(dispersion_day, hull=FALSE, ellipse = TRUE)

# pre-weaning faecal samples - dispersion between treatment groups
dist.pre <- vegdist(t(fcs.otu.pre), method="bray")

anova(betadisper(dist.pre, fcs.meta.pre$Treatment)) #statistical significant differences in dispersion between groups in pre-weaning faecal samples, although really low sum of squares
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq   Mean Sq F value  Pr(>F)  
## Groups      1 0.01692 0.0169151  6.4247 0.01207 *
## Residuals 188 0.49497 0.0026328                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dispersion.pre <- betadisper(dist.pre, group= fcs.meta.pre$Treatment)

plot(dispersion.pre, hull=FALSE, ellipse = TRUE)

# pre-weaning faecal samples - dispersion in each time-point
anova(betadisper(dist.pre, fcs.meta.pre$Day_of_study_factor)) #no statistical significant differences in dispersion between timepoints in pre-weaning faecal samples
## Analysis of Variance Table
## 
## Response: Distances
##            Df  Sum Sq   Mean Sq F value Pr(>F)
## Groups      3 0.02694 0.0089792  1.4967 0.2169
## Residuals 186 1.11587 0.0059993
dispersion.pre_day <- betadisper(dist.pre, group = fcs.meta.pre$Day_of_study_factor)

plot(dispersion.pre_day, hull=FALSE, ellipse = TRUE)

6. Abundance testing - pre-weaning

6.1. Load data

ps1.EcN <- readRDS("./phyobjects/ps1.EcN.rds")

ps1.EcN@sam_data$Day_of_study_factor <- factor(ps1.EcN@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))

ps1.EcN.g <- tax_glom(ps1.EcN, "Genus")
ps1.EcN.g.r <- microbiome::transform(ps1.EcN.g, "compositional")

ps1.EcN.g.r <- subset_samples(ps1.EcN.g.r, Origin %in% c("faeces"))
ps1.EcN.g.r <- subset_samples(ps1.EcN.g.r, Pre_or_post_weaning %in% c("Pre_weaning"))

ps1.EcN.g.r <- filter_taxa(ps1.EcN.g.r, function(x) sum(x > 0.001) > (0.30*length(x)), TRUE)

ps1.EcN.g.r #60 genera passed through the prevalence filter
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 60 taxa and 190 samples ]
## sample_data() Sample Data:       [ 190 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 60 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 60 tips and 59 internal nodes ]
# tax table with OTU column and best_hit
EcN.tax <- as.data.frame(tax_table(ps1.EcN.g.r))
EcN.tax$OTU <- rownames(EcN.tax)
tax_table(ps1.EcN.g.r) <- tax_table(as.matrix(EcN.tax))
ps1.EcN.g.bh <- format_to_besthit(ps1.EcN.g.r)
EcN.tax.bh <- as.data.frame(tax_table(ps1.EcN.g.bh))
colnames(EcN.tax.bh)[7] <- "OTU"

7.2. Prepare data

EcN.g.met <- meta(ps1.EcN.g.r)

EcN.g1 <- EcN.g.met[,c("Description","Ear_tag", "EcN_NGS_relative", "department_pre_weaning",
               "Treatment","Day_of_study_factor")]
# OTU table as dataframe:
EcN.g2 <- as.data.frame(t(otu_table(ps1.EcN.g.r)))
EcN.g2$Description <- rownames(EcN.g2)
# merge files
EcN.g3 <- merge(EcN.g1, EcN.g2, by = "Description")
colnames(EcN.g3)[7:ncol(EcN.g3)] <- paste("k__", sep = "", colnames(EcN.g3)[7:ncol(EcN.g3)])

7.3. Abundance testing

Abundance testing on pre-weaning faecal samples

# Results, warnings and messages have been inactivated for this markdown, as otherwise it will use too much space in the HTML output file.
tc.treatment <- taxa.compare(taxtab = EcN.g3, propmed.rel = "gamlss",
                        transform = "none", comvar = "Treatment",
                        adjustvar = c("Day_of_study_factor", "department_pre_weaning"),
                        personid = "Ear_tag", longitudinal = "yes",
                        p.adjust.method = "fdr")

tc.EcN05 <- subset(tc.treatment, pval.adjust.TreatmentEcN < .05)

write.csv(tc.treatment, file = "./output_data/tc.treatment_pre-w_genera_department_filter0.001_prev0.3.csv") 

tc.EcN05$id <- droplevels(as.factor(tc.EcN05$id))

EcN3.m <- reshape2::melt(EcN.g3)
EcN3.max <- ddply(EcN3.m, .(variable), summarise, max = max(value))
EcN3.max1 <- EcN3.max
EcN3.max1$variable <- droplevels(EcN3.max1$variable)
 
# subset
EcN.g4 <- subset(EcN3.m, variable %in% tc.EcN05$id &
                variable %in% EcN3.max1$variable)

# create vector of genus names (OTU codes) to include in plots
select.gen.EcN <- levels(droplevels(EcN.g4$variable)) # 6 genera
select.gen.EcN <- sub("k__", "", select.gen.EcN) # remove "k__"
select.gen.EcN

7.4. Editing output

tc.EcN.sub <- subset(tc.EcN05, id %in% EcN3.max1$variable)

# remove "k__" in id names
tc.EcN.sub$id <- as.factor(sub("k__", "", tc.EcN.sub$id))

# show tax names of output genera
EcN.g4.tax <- subset(EcN.tax.bh, OTU %in% select.gen.EcN) #significant genera

# create vectors of significant genera from comparisons
select.gen.EcN2 <- unique(c(as.character(select.gen.EcN)))

7.5. Prepare data for plotting

# filter for genera from first output series of GAMLSS tests (select.gen.EcN2).

# object 1: metadata
EcN.ls.met <- meta(ps1.EcN.g.r)

EcN.ls1 <- EcN.ls.met[,c("Description","Ear_tag", "EcN_NGS_relative", "department_pre_weaning", "Treatment","Day_of_study_factor")]

# object 2: OTU table
EcN.ls2 <- as.data.frame(t(otu_table(ps1.EcN.g.r)))
EcN.ls2$Description <- rownames(EcN.ls2)
# melt EcN.ls2
EcN.ls2.m <- reshape2::melt(EcN.ls2)
## Using Description as id variables
colnames(EcN.ls2.m) <- c("Description", "OTU", "abund")
# merge EcN.ls2, EcN.tax and EcN.ls1
EcN.ls3a <- base::merge(EcN.ls2.m, EcN.tax, by = "OTU")
EcN.ls3 <- base::merge(EcN.ls1, EcN.ls3a, by = "Description")

# subset to signif genera
EcN.ls3.max <- ddply(EcN.ls3, .(OTU), summarise, max = max(abund))
EcN.ls3.max$OTU <- droplevels(EcN.ls3.max$OTU)
EcN.ls3.s <- subset(EcN.ls3, OTU %in% select.gen.EcN2 & OTU %in%
                     EcN.ls3.max$OTU)
EcN.ls3.s$OTU <- droplevels(EcN.ls3.s$OTU)

7.6. Plot presets

theme4 <- theme_classic() + 
  theme(panel.grid.major = element_line(colour = "grey80"),
        panel.spacing = unit(.5,"lines"),
        panel.border = element_rect(color = "black", fill = NA, size = .5),
        strip.background = element_blank(),
        strip.placement = "outside",
        text = element_text(size=15),
        axis.text.x = element_text(hjust = .5, vjust = .5),
        plot.title = element_text(size = 12))
labs_abd <- as_labeller(c(
  "faeces" = "Faeces", "ileum_digesta" = "Ileum digesta", "jejunum_digesta" = " Jejunum digesta"))

7.7. Plotting loop

# for-loop
sig.plot <- list()
for(i in 1:nlevels(EcN.ls3.s$OTU)){
  a = levels(EcN.ls3.s$OTU)[i]
  B = EcN.ls3.s[EcN.ls3.s$OTU == a,]
  p = ggplot(B, aes(x = Day_of_study_factor, y = abund, color = Treatment)) +
    geom_boxplot(outlier.size = 0) +
    geom_point(aes(size = sqrt(EcN_NGS_relative), alpha = sqrt(EcN_NGS_relative)), position = position_jitterdodge(seed = 456)) +
    geom_point(size = 1, aes(fill = Treatment), colour= 'black', position = position_jitterdodge(seed = 456)) +
    labs(x="time (d)", y="rel. abundance") + scale_y_log10() +
    ggtitle(paste(B$Order,B$Family,B$Genus,B$OTU)) + theme4
  sig.plot[[i]] = p
}

# print plots
pdf("./figures/Plots_genus_sig_trimmed_compare_treatments_faeces_pre-w_padj.05_department_filter0.001_0.3_adj_y_axis_alpha_by_sqrt_ecn_rela_log10_y_axis_size_by_sqrt_ecn_rela_h8_w12.pdf", height = 8, width = 12)
for (i in 1:nlevels(EcN.ls3.s$OTU)) {
    print(sig.plot[[i]])
}
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 123 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 127 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 132 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 129 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 112 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 88 rows containing non-finite values (stat_boxplot).
dev.off() #Resulting figures can be found under Supplementary Figure S11 of the manuscript
## png 
##   2

7. Abundance testing - post-weaning

7.1. Load data

ps1.EcN <- readRDS("./phyobjects/ps1.EcN.rds")

ps1.EcN@sam_data$Day_of_study_factor <- factor(ps1.EcN@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))

ps1.EcN.g <- tax_glom(ps1.EcN, "Genus")
ps1.EcN.g.r <- microbiome::transform(ps1.EcN.g, "compositional")

ps1.EcN.g.r <- subset_samples(ps1.EcN.g.r, Origin %in% c("faeces"))
ps1.EcN.g.r <- subset_samples(ps1.EcN.g.r, Pre_or_post_weaning %in% c("Post_weaning"))

ps1.EcN.g.r <- filter_taxa(ps1.EcN.g.r, function(x) sum(x > 0.001) > (0.30*length(x)), TRUE)

ps1.EcN.g.r # 87 genera passed through the prevalence filter
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 87 taxa and 96 samples ]
## sample_data() Sample Data:       [ 96 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 87 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 87 tips and 86 internal nodes ]
# tax table with OTU column and best_hit
EcN.tax <- as.data.frame(tax_table(ps1.EcN.g.r))
EcN.tax$OTU <- rownames(EcN.tax)
tax_table(ps1.EcN.g.r) <- tax_table(as.matrix(EcN.tax))
ps1.EcN.g.bh <- format_to_besthit(ps1.EcN.g.r)
EcN.tax.bh <- as.data.frame(tax_table(ps1.EcN.g.bh))
colnames(EcN.tax.bh)[7] <- "OTU"

7.2. Prepare data

EcN.g.met <- meta(ps1.EcN.g.r)

EcN.g1 <- EcN.g.met[,c("Description","Ear_tag", "EcN_NGS_relative", "department_pre_weaning",
               "Treatment","Day_of_study_factor")]
# OTU table as dataframe:
EcN.g2 <- as.data.frame(t(otu_table(ps1.EcN.g.r)))
EcN.g2$Description <- rownames(EcN.g2)
# merge files
EcN.g3 <- merge(EcN.g1, EcN.g2, by = "Description")
colnames(EcN.g3)[7:ncol(EcN.g3)] <- paste("k__", sep = "", colnames(EcN.g3)[7:ncol(EcN.g3)])

7.3. Abundance testing

Abundance testing on post-weaning faecal samples

# Results, warnings and messages have been inactivated for this markdown, as otherwise it will use too much space in the HTML output file.
tc.treatment <- taxa.compare(taxtab = EcN.g3, propmed.rel = "gamlss",
                        transform = "none", comvar = "Treatment",
                        adjustvar = c("Day_of_study_factor", "department_pre_weaning"),
                        personid = "Ear_tag", longitudinal = "yes",
                        p.adjust.method = "fdr")

tc.EcN05 <- subset(tc.treatment, pval.adjust.TreatmentEcN < .05)

write.csv(tc.treatment, file = "./output_data/tc.treatment_post-w_genera_department_filter0.001_prev0.3.csv") 

tc.EcN05$id <- droplevels(as.factor(tc.EcN05$id))

EcN3.m <- reshape2::melt(EcN.g3)
EcN3.max <- ddply(EcN3.m, .(variable), summarise, max = max(value))
EcN3.max1 <- EcN3.max
EcN3.max1$variable <- droplevels(EcN3.max1$variable)
 
# subset
EcN.g4 <- subset(EcN3.m, variable %in% tc.EcN05$id &
                variable %in% EcN3.max1$variable)

# create vector of genus names (OTU codes) to include in plots
select.gen.EcN <- levels(droplevels(EcN.g4$variable)) 
select.gen.EcN <- sub("k__", "", select.gen.EcN) # remove "k__"
select.gen.EcN

7.4. Editing output

tc.EcN.sub <- subset(tc.EcN05, id %in% EcN3.max1$variable)

# remove "k__" in id names
tc.EcN.sub$id <- as.factor(sub("k__", "", tc.EcN.sub$id))

# show tax names of output genera
EcN.g4.tax <- subset(EcN.tax.bh, OTU %in% select.gen.EcN) #significant genera

# create vectors of significant genera from comparisons
select.gen.EcN2 <- unique(c(as.character(select.gen.EcN)))

7.5. Prepare data for plotting

# filter for genera from first output series of GAMLSS tests (select.gen.EcN2).

# object 1: metadata
EcN.ls.met <- meta(ps1.EcN.g.r)

EcN.ls1 <- EcN.ls.met[,c("Description","Ear_tag", "EcN_NGS_relative", "department_pre_weaning", "Treatment","Day_of_study_factor")]

# object 2: OTU table
EcN.ls2 <- as.data.frame(t(otu_table(ps1.EcN.g.r)))
EcN.ls2$Description <- rownames(EcN.ls2)
# melt EcN.ls2
EcN.ls2.m <- reshape2::melt(EcN.ls2)
## Using Description as id variables
colnames(EcN.ls2.m) <- c("Description", "OTU", "abund")
# merge EcN.ls2, EcN.tax and EcN.ls1
EcN.ls3a <- base::merge(EcN.ls2.m, EcN.tax, by = "OTU")
EcN.ls3 <- base::merge(EcN.ls1, EcN.ls3a, by = "Description")

# subset to signif genera
EcN.ls3.max <- ddply(EcN.ls3, .(OTU), summarise, max = max(abund))

EcN.ls3.max$OTU <- droplevels(EcN.ls3.max$OTU)
EcN.ls3.s <- subset(EcN.ls3, OTU %in% select.gen.EcN2 & OTU %in%
                     EcN.ls3.max$OTU)
EcN.ls3.s$OTU <- droplevels(EcN.ls3.s$OTU)

7.6. Plot presets

theme4 <- theme_classic() + 
  theme(panel.grid.major = element_line(colour = "grey80"),
        panel.spacing = unit(.5,"lines"),
        panel.border = element_rect(color = "black", fill = NA, size = .5),
        strip.background = element_blank(),
        strip.placement = "outside",
        text = element_text(size=15),
        axis.text.x = element_text(hjust = .5, vjust = .5),
        plot.title = element_text(size = 12))
labs_abd <- as_labeller(c(
  "faeces" = "Faeces", "ileum_digesta" = "Ileum digesta", "jejunum_digesta" = " Jejunum digesta"))

7.7. Plotting loop

# for-loop
sig.plot <- list()
for(i in 1:nlevels(EcN.ls3.s$OTU)){
  a = levels(EcN.ls3.s$OTU)[i]
  B = EcN.ls3.s[EcN.ls3.s$OTU == a,]
  p = ggplot(B, aes(x = Day_of_study_factor, y = abund, color = Treatment)) +
    geom_boxplot(outlier.size = 0) +
    geom_point(aes(size = sqrt(EcN_NGS_relative), alpha = sqrt(EcN_NGS_relative)), position = position_jitterdodge(seed = 456)) +
    geom_point(size = 1, aes(fill = Treatment), colour= 'black', position = position_jitterdodge(seed = 456)) +
    labs(x="time (d)", y="rel. abundance") + scale_y_log10() +
    ggtitle(paste(B$Order,B$Family,B$Genus,B$OTU)) + theme4
  sig.plot[[i]] = p
}

# print plots
pdf("./figures/Plots_genus_sig_trimmed_compare_treatments_faeces_post-w_padj.05_department_filter0.001_0.3_adj_y_axis_alpha_by_sqrt_ecn_rela_log10_y_axis_size_by_sqrt_ecn_rela_h8_w12.pdf", height = 8, width = 12)
for (i in 1:nlevels(EcN.ls3.s$OTU)) {
    print(sig.plot[[i]])
}
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 44 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 57 rows containing non-finite values (stat_boxplot).
dev.off() #Resulting figures can be found under Supplementary Figure S12 of the manuscript
## png 
##   2

8. Abundance testing - all samples

8.1. Load data

ps1.EcN <- readRDS("./phyobjects/ps1.EcN.rds")

ps1.EcN@sam_data$Day_of_study_factor <- factor(ps1.EcN@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))

ps1.EcN.g <- tax_glom(ps1.EcN, "Genus")
ps1.EcN.g.r <- microbiome::transform(ps1.EcN.g, "compositional")

ps1.EcN.g.r <- subset_samples(ps1.EcN.g.r, Origin %in% c("faeces"))

ps1.EcN.g.r <- filter_taxa(ps1.EcN.g.r, function(x) sum(x > 0.001) > (0.30*length(x)), TRUE)

ps1.EcN.g.r # 71 genera passed through the prevalence filter
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 71 taxa and 286 samples ]
## sample_data() Sample Data:       [ 286 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 71 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 71 tips and 70 internal nodes ]
# tax table with OTU column and best_hit
EcN.tax <- as.data.frame(tax_table(ps1.EcN.g.r))
EcN.tax$OTU <- rownames(EcN.tax)
tax_table(ps1.EcN.g.r) <- tax_table(as.matrix(EcN.tax))
ps1.EcN.g.bh <- format_to_besthit(ps1.EcN.g.r)
EcN.tax.bh <- as.data.frame(tax_table(ps1.EcN.g.bh))
colnames(EcN.tax.bh)[7] <- "OTU"

8.2. Prepare data

EcN.g.met <- meta(ps1.EcN.g.r)

EcN.g1 <- EcN.g.met[,c("Description","Ear_tag", "EcN_NGS_relative", "department_pre_weaning",
               "Treatment","Day_of_study_factor")]
# OTU table as dataframe:
EcN.g2 <- as.data.frame(t(otu_table(ps1.EcN.g.r)))
EcN.g2$Description <- rownames(EcN.g2)
# merge files
EcN.g3 <- merge(EcN.g1, EcN.g2, by = "Description")
colnames(EcN.g3)[7:ncol(EcN.g3)] <- paste("k__", sep = "", colnames(EcN.g3)[7:ncol(EcN.g3)])

8.3. Abundance testing

Abundance testing on all faecal samples

# Results, warnings and messages have been inactivated for this markdown, as otherwise it will use too much space in the HTML output file.
tc.treatment <- taxa.compare(taxtab = EcN.g3, propmed.rel = "gamlss",
                        transform = "none", comvar = "Treatment",
                        adjustvar = c("Day_of_study_factor", "department_pre_weaning"),
                        personid = "Ear_tag", longitudinal = "yes",
                        p.adjust.method = "fdr")

tc.EcN05 <- subset(tc.treatment, pval.adjust.TreatmentEcN < .05)

write.csv(tc.treatment, file = "./output_data/tc.treatment_all_samples_genera_department_filter0.001_prev0.3.csv") 

tc.EcN05$id <- droplevels(as.factor(tc.EcN05$id))

EcN3.m <- reshape2::melt(EcN.g3)
EcN3.max <- ddply(EcN3.m, .(variable), summarise, max = max(value))
EcN3.max1 <- EcN3.max
EcN3.max1$variable <- droplevels(EcN3.max1$variable)
 
# subset
EcN.g4 <- subset(EcN3.m, variable %in% tc.EcN05$id &
                variable %in% EcN3.max1$variable)

# create vector of genus names (OTU codes) to include in plots
select.gen.EcN <- levels(droplevels(EcN.g4$variable)) 
select.gen.EcN <- sub("k__", "", select.gen.EcN) # remove "k__"
select.gen.EcN

8.4. Editing output

tc.EcN.sub <- subset(tc.EcN05, id %in% EcN3.max1$variable)

# remove "k__" in id names
tc.EcN.sub$id <- as.factor(sub("k__", "", tc.EcN.sub$id))

# show tax names of output genera
EcN.g4.tax <- subset(EcN.tax.bh, OTU %in% select.gen.EcN) #significant genera

# create vectors of significant genera from comparisons
select.gen.EcN2 <- unique(c(as.character(select.gen.EcN)))

8.5. Prepare data for plotting

# filter for genera from first output series of GAMLSS tests (select.gen.EcN2).

# object 1: metadata
EcN.ls.met <- meta(ps1.EcN.g.r)

EcN.ls1 <- EcN.ls.met[,c("Description","Ear_tag", "EcN_NGS_relative", "department_pre_weaning", "Treatment","Day_of_study_factor")]

# object 2: OTU table
EcN.ls2 <- as.data.frame(t(otu_table(ps1.EcN.g.r)))
EcN.ls2$Description <- rownames(EcN.ls2)
# melt EcN.ls2
EcN.ls2.m <- reshape2::melt(EcN.ls2)
## Using Description as id variables
colnames(EcN.ls2.m) <- c("Description", "OTU", "abund")
# merge EcN.ls2, EcN.tax and EcN.ls1
EcN.ls3a <- base::merge(EcN.ls2.m, EcN.tax, by = "OTU")
EcN.ls3 <- base::merge(EcN.ls1, EcN.ls3a, by = "Description")

# subset to signif genera
EcN.ls3.max <- ddply(EcN.ls3, .(OTU), summarise, max = max(abund))
EcN.ls3.max$OTU <- droplevels(EcN.ls3.max$OTU)
EcN.ls3.s <- subset(EcN.ls3, OTU %in% select.gen.EcN2 & OTU %in%
                     EcN.ls3.max$OTU)
EcN.ls3.s$OTU <- droplevels(EcN.ls3.s$OTU)

8.6. Plot presets

theme4 <- theme_classic() + 
  theme(panel.grid.major = element_line(colour = "grey80"),
        panel.spacing = unit(.5,"lines"),
        panel.border = element_rect(color = "black", fill = NA, size = .5),
        strip.background = element_blank(),
        strip.placement = "outside",
        text = element_text(size=15),
        axis.text.x = element_text(hjust = .5, vjust = .5),
        plot.title = element_text(size = 12))
labs_abd <- as_labeller(c(
  "faeces" = "Faeces", "ileum_digesta" = "Ileum digesta", "jejunum_digesta" = " Jejunum digesta"))

8.7. Plotting loop

# for-loop
sig.plot <- list()
for(i in 1:nlevels(EcN.ls3.s$OTU)){
  a = levels(EcN.ls3.s$OTU)[i]
  B = EcN.ls3.s[EcN.ls3.s$OTU == a,]
  p = ggplot(B, aes(x = Day_of_study_factor, y = abund, color = Treatment)) +
    geom_boxplot(outlier.size = 0) +
    geom_point(aes(size = sqrt(EcN_NGS_relative), alpha = sqrt(EcN_NGS_relative)), position = position_jitterdodge(seed = 456)) +
    geom_point(size = 1, aes(fill = Treatment), colour= 'black', position = position_jitterdodge(seed = 456)) +
    labs(x="time (d)", y="rel. abundance") + scale_y_log10() +
    ggtitle(paste(B$Order,B$Family,B$Genus,B$OTU)) + theme4
  sig.plot[[i]] = p
}

# print plots
pdf("./figures/Plots_genus_sig_trim_comp_treatm_fcs_all_samp_padj.05_departm_filt0.001_0.3_adj_y_axis_alpha_by_sqrt_ecn_rela_log10_y_axis_size_by_sqrt_ecn_rela_h8_w12.pdf", height = 8, width = 12)
for (i in 1:nlevels(EcN.ls3.s$OTU)) {
    print(sig.plot[[i]])
}
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 177 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 164 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 107 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 194 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 184 rows containing non-finite values (stat_boxplot).
## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 193 rows containing non-finite values (stat_boxplot).
dev.off() #Resulting figures can be found under Figure 4 of the manuscript
## png 
##   2

9. Heatmap - Proteobacteria ASVs

ps1 <- readRDS("./phyobjects/ps1.EcN.rds")

taxa_names(physeq = ps1) <- interaction(rownames(ps1@tax_table),ps1@tax_table[,"Genus"],ps1@tax_table[,"Family"])

ps1 <- microbiome::transform(ps1, "compositional")

ps1 <- subset_taxa(ps1, Phylum == "Proteobacteria")

ps1@sam_data$Day_of_study_factor <- factor(ps1@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))
ps1@sam_data$Pre_or_post_weaning <- factor(ps1@sam_data$Pre_or_post_weaning, levels = c("Pre_weaning", "Post_weaning"))

ps1 <- subset_samples(ps1, Origin %in% c("faeces"))
ps1 <- subset_samples(ps1, Pre_or_post_weaning %in% c("Pre_weaning"))

metadata <- ps1@sam_data
treatment1 <- rownames(metadata[metadata$Treatment != "Control"])
treatment2 <- rownames(metadata[metadata$Treatment == "Control"])

otu <- as.data.frame(ps1@otu_table)
taxa_group1 <- otu[,treatment1]
taxa_group2 <- otu[,treatment2]

significant <- list()
for (i in rownames(taxa_group1)){
  x = wilcox.test(x= as.numeric(taxa_group1[i,]),y = as.numeric(taxa_group2[i,]),paired = F,exact = F)
  significant[[i]] <- x$p.value  
} 

my_taxa <- do.call(rbind.data.frame, significant)
rownames(my_taxa) <-  rownames(taxa_group1)
my_taxa[,2] <- 1
my_taxa[,3] <- "whatev"

colnames(my_taxa) <- c("pval","corrected","taxon")
my_taxa$taxon <- rownames(my_taxa)

my_taxa$corrected <- p.adjust(my_taxa[,1], method = "BH")

significantly_different_taxa <- my_taxa[which(my_taxa$corrected < 1),]
significantly_different_taxa2 <- rownames(significantly_different_taxa)

ps1.significantly_different_taxa <- ps1
ps1.significantly_different_taxa@otu_table <- ps1@otu_table[significantly_different_taxa2,]
ps1.significantly_different_taxa@tax_table <- ps1@tax_table[significantly_different_taxa2,]

p <- plot_heatmap(ps1.significantly_different_taxa, method = "PCA", sample.order = "Ear_tag", title = "Heatmap_of_pre_weaning_proteobacteria", taxa.order = "Genus", sample.label="Ear_tag", low="#FFFFCC", high="#000033", na.value="white") + facet_grid(~Day_of_study_factor + Treatment, scales = "free") 

p
## Warning: Transformation introduced infinite values in discrete y-axis

pdf(file = "./figures/Heatmap_proteobacteria_otus_control_vs_ecn_no_testing_OTUgenusFamily_label_xlabel_eartag_order_eartag_treatment_grid.pdf", height = 15, width = 20)
p
## Warning: Transformation introduced infinite values in discrete y-axis
dev.off() #Resulting figure can be found under Supplementary Figure S9 of the manuscript
## png 
##   2

10. EcN relative abundance

10.1 Import data

pseq_not_annotated1 <- read_phyloseq(otu.file = "./input_data/Galaxy76-[NG-Tax__porcine_study_all_samples_eightrun_length120bp_no_annotation_0.005threshold_5may2020].biom1", taxonomy.file = TRUE, metadata.file = "./input_data/Metadata_porcine_study_june_2020.csv", "biom")
## Time to complete depends on OTU file size
## Warning in read_biom2phyloseq(biom.file = otu.file, taxonomy.file, metadata.file): Taxonomy is available both in the biom file and in 
##         the separate taxonomy.file. Using the biom file version here. 
##         To change this original taxonomy, do it explicitly in your code 
##         by modifying tax_table(physeq).
pseq_not_annotated1 <- prune_taxa(taxa_sums(pseq_not_annotated1) > 0, pseq_not_annotated1)
pseq_not_annotated1 # 110975 taxa in 489 samples
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 110975 taxa and 489 samples ]
## sample_data() Sample Data:       [ 489 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 110975 taxa by 6 taxonomic ranks ]
saveRDS(pseq_not_annotated1, "./phyobjects/pseq_not_annotated1.rds")

# convert to relative abundance
pseq_not_annotated1 <- microbiome::transform(pseq_not_annotated1, "compositional")

tax_table(pseq_not_annotated1) <- cbind(tax_table(pseq_not_annotated1), 
                            rownames(tax_table(pseq_not_annotated1)))

colnames(tax_table(pseq_not_annotated1)) <- 
  c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "OTUID")


colnames(tax_table(pseq_not_annotated1))
## [1] "Domain" "Phylum" "Class"  "Order"  "Family" "Genus"  "OTUID"
ps1.1OTU <- subset_taxa(pseq_not_annotated1, OTUID == "2671525405")
ps1.1OTU
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1 taxa and 489 samples ]
## sample_data() Sample Data:       [ 489 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 1 taxa by 7 taxonomic ranks ]
saveRDS(ps1.1OTU, "./phyobjects/ps1.1OTU.rds")

otu <- as.data.frame(ps1.1OTU@otu_table)
samples2 <- otu
samples3 <- t(samples2)

write.csv(samples3, file = "./EcN_abundance_NGS_dataset/ecn_otu_table_relative2.csv", fileEncoding = "UTF-16LE")

10.2. Barplot - Spiked samples

pseq_not_annotated1 <- readRDS("./phyobjects/pseq_not_annotated1.rds")

# select spiked samples
ps1 <- subset_samples(pseq_not_annotated1, Description %in% c("spk3", "spk4", "spk5", "spk6", "spk7")) 

# convert to relative abundance
ps1 <- microbiome::transform(ps1, "compositional")

#remove taxa that are not present in the spiked samples
ps1 <- prune_taxa(taxa_sums(ps1) > 0, ps1)
print(ps1)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 3821 taxa and 5 samples ]
## sample_data() Sample Data:       [ 5 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 3821 taxa by 6 taxonomic ranks ]
ps1 <- filter_taxa(ps1, function(x) sum(x > 0.0125) > (0.005*length(x)), TRUE)

tax_table(ps1) <- cbind(tax_table(ps1), 
                            rownames(tax_table(ps1)))

colnames(tax_table(ps1)) <- 
  c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "OTUID")

getPalette = colorRampPalette(brewer.pal(10, "Paired"))

#barplot with single OTU
p <- plot_bar(ps1, fill = "OTUID")
p <- p + facet_wrap(~Day_of_study_factor, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
                                colour = "lightgrey",
                                size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(10))
p

pdf(file = "./figures/Barplot_EcN_presence_in_spiked_samples.pdf", height = 10, width = 10)
p
dev.off() #Resulting figure can be found under Supplementary Figure S2 of the manuscript
## png 
##   2

10.3. Barplot - EcN in feaces

ps1.1OTU <- readRDS("./phyobjects/ps1.1OTU.rds")

ps1.1OTU@sam_data$Day_of_study_factor <- factor(ps1.1OTU@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))

ps1.1OTU <- subset_samples(ps1.1OTU, Unique %in% c("yes"))
ps1.1OTU <- subset_samples(ps1.1OTU, Origin %in% c("faeces"))
#subset to exclusively contain pre-weaning faecal samples, as post-weaning faecal samples do not contain the EcN-specific ASV:
ps1.1OTU <- subset_samples(ps1.1OTU, Day_of_study_factor %in% c("Day_4", "Day_8", "Day_14", "Day_26"))
ps1.1OTU <- subset_samples(ps1.1OTU, Treatment %in% c("EcN"))

getPalette = colorRampPalette(brewer.pal(24, "Paired"))
## Warning in brewer.pal(24, "Paired"): n too large, allowed maximum for palette Paired is 12
## Returning the palette you asked for with that many colors
#barplot with single OTU
p <- plot_bar(ps1.1OTU, x="Ear_tag", fill = "Ear_tag")
p <- p + facet_wrap(~Day_of_study_factor, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
                                colour = "lightgrey",
                                size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(24))
p

pdf(file = "./figures/Barplot_EcN_presence_over_time_in_faeces_pre-weaning_ear_tag.pdf", height = 15, width = 25)
p
dev.off() #Resulting figure can be found under Figure 2 of the manuscript
## png 
##   2

10.4. Barplot - EcN in digesta

ps1.1OTU <- readRDS("./phyobjects/ps1.1OTU.rds")

ps1.1OTU@sam_data$Origin_Day <- factor(ps1.1OTU@sam_data$Origin_Day, levels = c("faeces_Day_4", "faeces_Day_8", "faeces_Day_14", "faeces_Day_26", "faeces_Day_35", "faeces_Day_43", "faeces_Day_59", "faeces_Day_69", "jejunum_digesta_Day_27", "ileum_digesta_Day_27", "caecum_digesta_Day_27", "jejunum_digesta_Day_44", "ileum_digesta_Day_44", "caecum_digesta_Day_44", "jejunum_digesta_Day_70", "ileum_digesta_Day_70", "caecum_digesta_Day_70")) 

ps1.1OTU <- subset_samples(ps1.1OTU, Unique %in% c("yes"))
ps1.1OTU <- subset_samples(ps1.1OTU, Origin %in% c("jejunum_digesta", "ileum_digesta", "caecum_digesta"))

#subset to exclusively contain pre-weaning faecal samples, as post-weaning faecal samples do not contain the EcN-specific ASV:
ps1.1OTU <- subset_samples(ps1.1OTU, Treatment %in% c("EcN"))

getPalette = colorRampPalette(brewer.pal(24, "Paired"))
## Warning in brewer.pal(24, "Paired"): n too large, allowed maximum for palette Paired is 12
## Returning the palette you asked for with that many colors
#barplot with single OTU
p <- plot_bar(ps1.1OTU, x="Ear_tag", fill = "Origin_Day")
p <- p + facet_wrap(~Origin_Day, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
                                colour = "lightgrey",
                                size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(24))
p

pdf(file = "./figures/Barplot_EcN_presence_over_time_in_digesta_ear_tag.pdf", height = 15, width = 25)
p
dev.off() #Resulting figure can be found under Supplementary Figure S6 of the manuscript
## png 
##   2

10.5. Barplot - Treponema presence

ps1.1genus <- readRDS("./phyobjects/ps1.EcN.rds")

# convert to relative abundance
ps1.1genus <- microbiome::transform(ps1.1genus, "compositional")

ps1.1genus@sam_data$Day_of_study_factor <- factor(ps1.1genus@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))

colnames(tax_table(ps1.1genus))
## [1] "Domain" "Phylum" "Class"  "Order"  "Family" "Genus"
tax_table(ps1.1genus) <- cbind(tax_table(ps1.1genus), 
                            rownames(tax_table(ps1.1genus)))

colnames(tax_table(ps1.1genus)) <- 
  c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "OTUID")

ps1.1genus <- subset_taxa(ps1.1genus, Genus == "Treponema_2")
ps1.1genus <- subset_samples(ps1.1genus, Unique %in% c("yes"))
ps1.1genus <- subset_samples(ps1.1genus, Origin %in% c("faeces"))
ps1.1genus <- prune_taxa(taxa_sums(otu_table(ps1.1genus))>0, ps1.1genus)

getPalette = colorRampPalette(brewer.pal(ntaxa(ps1.1genus), "Paired"))
## Warning in brewer.pal(ntaxa(ps1.1genus), "Paired"): n too large, allowed maximum for palette Paired is 12
## Returning the palette you asked for with that many colors
#barplot with single OTU
p <- plot_bar(ps1.1genus, x="Ear_tag", fill = "OTU")
p <- p + facet_wrap(~Day_of_study_factor + Origin + Treatment, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
                                colour = "lightgrey",
                                size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(ntaxa(ps1.1genus)))
p

pdf(file = "./figures/Barplot_Treponema_2_presence_over_time_in_faeces_fill_OTU.pdf", height = 15, width = 25)
p
dev.off() #Resulting figure can be found under Supplementary Figure S13 of the manuscript
## png 
##   2

10.6. Barplot - Treponema suis

ps1.1genus <- readRDS("./phyobjects/ps1.EcN.rds")

ps1.1genus@sam_data$Day_of_study_factor <- factor(ps1.1genus@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))

colnames(tax_table(ps1.1genus))
## [1] "Domain" "Phylum" "Class"  "Order"  "Family" "Genus"
tax_table(ps1.1genus) <- cbind(tax_table(ps1.1genus), 
                            rownames(tax_table(ps1.1genus)))

colnames(tax_table(ps1.1genus)) <- 
  c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "OTUID")

# convert to relative abundance
ps1.1genus <- microbiome::transform(ps1.1genus, "compositional")

# Select a single species within the Treponema genus:
#ps1.1genus <- subset_taxa(ps1.1genus, OTUID == "8138091913") #Treponema Succinifaciens
#ps1.1genus <- subset_taxa(ps1.1genus, OTUID == "8138091950") #Treponema porcinum
#ps1.1genus <- subset_taxa(ps1.1genus, OTUID == "8138091954") #Treponema parvum
#ps1.1genus <- subset_taxa(ps1.1genus, OTUID == "8138092035") #Treponema berlinense
ps1.1genus <- subset_taxa(ps1.1genus, OTUID == "8138091173" | OTUID == "813809349") #Candidatus Treponema suis

ps1.1genus <- subset_samples(ps1.1genus, Unique %in% c("yes"))
ps1.1genus <- subset_samples(ps1.1genus, Origin %in% c("faeces")) #, "caecum_digesta"))

ps1.1genus <- prune_taxa(taxa_sums(otu_table(ps1.1genus))>0, ps1.1genus)

getPalette = colorRampPalette(brewer.pal(ntaxa(ps1.1genus), "Paired"))
## Warning in brewer.pal(ntaxa(ps1.1genus), "Paired"): minimal value for n is 3, returning requested palette with 3 different levels
#barplot with single OTU
p <- plot_bar(ps1.1genus, x="Ear_tag", fill = "OTU")
p <- p + facet_wrap(~Day_of_study_factor + Origin + Treatment, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
                                colour = "lightgrey",
                                size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(ntaxa(ps1.1genus)))
p

pdf(file = "./figures/Barplot_candidatus_treponema_suis1_and_2_singleOTU_presence_over_time_fill_OTU_faeces.pdf", height = 15, width = 25)
p
dev.off() #Resulting figure can be found under Supplementary Figure S13 of the manuscript
## png 
##   2

10.7. Barplot - Holdemanella

ps1.1genus <- readRDS("./phyobjects/ps1.EcN.rds")

ps1.1genus@sam_data$Day_of_study_factor <- factor(ps1.1genus@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))

# convert to relative abundance
ps1.1genus <- microbiome::transform(ps1.1genus, "compositional")

ps1.1genus <- subset_taxa(ps1.1genus, Genus == "Holdemanella")
ps1.1genus <- subset_samples(ps1.1genus, Unique %in% c("yes"))
ps1.1genus <- subset_samples(ps1.1genus, Origin %in% c("faeces"))

ps1.1genus <- subset_samples(ps1.1genus, Pre_or_post_weaning %in% c("Pre_weaning"))

ps1.1genus <- prune_taxa(taxa_sums(otu_table(ps1.1genus))>0, ps1.1genus)

getPalette = colorRampPalette(brewer.pal(ntaxa(ps1.1genus), "Paired"))

#barplot with single OTU
p <- plot_bar(ps1.1genus, x="Ear_tag", fill = "OTU")
p <- p + facet_wrap(~Day_of_study_factor + Origin + Treatment, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
                                colour = "lightgrey",
                                size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(ntaxa(ps1.1genus)))
p

pdf(file = "./figures/Barplot_Holdemanella_presence_over_time_in_faeces_pre-w_fill_OTU.pdf", height = 15, width = 25)
p
dev.off() #This figure was not used in the manuscript, but does contain interesting data.
## png 
##   2

10.8. Barplot - Ruminococcaceae_UCG−014

ps1.1genus <- readRDS("./phyobjects/ps1.EcN.rds")

ps1.1genus@sam_data$Day_of_study_factor <- factor(ps1.1genus@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))

# convert to relative abundance
ps1.1genus <- microbiome::transform(ps1.1genus, "compositional")

ps1.1genus <- subset_taxa(ps1.1genus, Genus == "Ruminococcaceae_UCG−014")
ps1.1genus <- subset_samples(ps1.1genus, Unique %in% c("yes"))
ps1.1genus <- subset_samples(ps1.1genus, Origin %in% c("faeces"))
ps1.1genus <- prune_taxa(taxa_sums(otu_table(ps1.1genus))>0, ps1.1genus)

getPalette = colorRampPalette(brewer.pal(ntaxa(ps1.1genus), "Paired"))
## Warning in brewer.pal(ntaxa(ps1.1genus), "Paired"): n too large, allowed maximum for palette Paired is 12
## Returning the palette you asked for with that many colors
#barplot with single OTU
p <- plot_bar(ps1.1genus, x="Ear_tag", fill = "OTU")
p <- p + facet_wrap(~Day_of_study_factor + Origin + Treatment, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
                                colour = "lightgrey",
                                size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(ntaxa(ps1.1genus)))
p

pdf(file = "./figures/Barplot_Ruminococcaceae_UCG−014_presence_over_time_in_faeces_fill_OTU.pdf", height = 15, width = 25)
p
dev.off() #This figure was not used in the manuscript, but does contain interesting data.
## png 
##   2

10.9. Barplot - Terrisporobacter

ps1.1genus <- readRDS("./phyobjects/ps1.EcN.rds")

ps1.1genus@sam_data$Day_of_study_factor <- factor(ps1.1genus@sam_data$Day_of_study_factor, levels = c("Day_4", "Day_8", "Day_14", "Day_26", "Day_27", "Day_35", "Day_43", "Day_44", "Day_59", "Day_69", "Day_70"))

# convert to relative abundance
ps1.1genus <- microbiome::transform(ps1.1genus, "compositional")

ps1.1genus <- subset_taxa(ps1.1genus, Genus == "Terrisporobacter")
ps1.1genus <- subset_samples(ps1.1genus, Unique %in% c("yes"))
ps1.1genus <- subset_samples(ps1.1genus, Origin %in% c("faeces"))

ps1.1genus <- subset_samples(ps1.1genus, Pre_or_post_weaning %in% c("Pre_weaning"))
ps1.1genus <- prune_taxa(taxa_sums(otu_table(ps1.1genus))>0, ps1.1genus)

getPalette = colorRampPalette(brewer.pal(ntaxa(ps1.1genus), "Paired"))
## Warning in brewer.pal(ntaxa(ps1.1genus), "Paired"): minimal value for n is 3, returning requested palette with 3 different levels
#barplot with single OTU
p <- plot_bar(ps1.1genus, x="Ear_tag", fill = "OTU")
p <- p + facet_wrap(~Day_of_study_factor + Origin + Treatment, scales = "free_x", nrow = 1)
p <- p + theme(text = element_text(size = 20), panel.background = element_rect(fill = "white",
                                colour = "lightgrey",
                                size = 0.5, linetype = "solid"),)
p <- p + scale_fill_manual(values = getPalette(ntaxa(ps1.1genus)))
p

pdf(file = "./figures/Barplot_Terrisporobacter_presence_over_time_in_faeces_pre-w_fill_OTU.pdf", height = 15, width = 25)
p
dev.off() #This figure was not used in the manuscript, but does contain interesting data.
## png 
##   2

11. NGS vs qPCR data

Compare EcN abundance in NGS vs qPCR data

ps1 <- readRDS("./phyobjects/ps1.EcN.rds")

ps1.tested <- ps1 <- subset_samples(ps1, fcs_tested_qpcr %in% c("yes"))
ps1.tested
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 3022 taxa and 35 samples ]
## sample_data() Sample Data:       [ 35 samples by 171 sample variables ]
## tax_table()   Taxonomy Table:    [ 3022 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 3022 tips and 3021 internal nodes ]
ps1.tested.df <- as.data.frame(sample_data(ps1.tested))

p <- ggplot(ps1.tested.df, 
       mapping = aes(rel_abund_ecn_ct33, EcN_NGS_relative)) +
  geom_point(size = 3, alpha = 0.9, colour = 'darkgreen') + scale_x_log10() + scale_y_log10() +
  geom_smooth(method='lm', se = FALSE)
p
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).

pdf(file = "./figures/Compare_EcN_abund_NGS_vs_qPCR_ct33.pdf", height = 10, width = 15)
p
## Warning: Transformation introduced infinite values in continuous x-axis

## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
dev.off()
## png 
##   2
#Calculate r-squared:

x <- c(ps1.tested.df$rel_abund_ecn_ct33)
y <- c(ps1.tested.df$EcN_NGS_relative)
ps1.tested.df.calc_r2_merged <- data.frame(x, y)

summary(lm(x ~ y, data=ps1.tested.df.calc_r2_merged))$r.squared 
## [1] 0.837213

12. Volcano plot with gene expression data

12.1. ileum data day 27

res <- read.table("./input_data/ileum_data_volcano_plot_for_R_d27_ecn_padj.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

head(res)
##     Gene log2FoldChange    pvalue padj
## 1    C3      0.08798021 0.6818075    1
## 2    C5      0.16858111 0.7429596    1
## 3 DEFB1     -0.22286587 0.6403522    1
## 4 GAPDH     -0.03078863 0.7210233    1
## 5  IL1B     -0.89565748 0.3587843    1
## 6   IL8     -0.17655240 0.5781095    1
colnames(res) <- c("Gene","log2FoldChange","pvalue", "padj")

res$padj <- rownames(res)

res$padj <- p.adjust(res[,3], method = "fdr")

# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
## Warning: package 'calibrate' was built under R version 3.6.3
## 
## Attaching package: 'calibrate'
## The following objects are masked from 'package:vegan':
## 
##     calibrate, rda
with(subset(res, padj<.5 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))

# The pdf figure should be used to really look at the data, as it has better visibility
pdf(file = "./figures/ileum_data_volcano_plot_for_R_d27_ecn_padj_xy_limits.pdf", height = 10, width = 15)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)

with(subset(res, pvalue<1 & abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png 
##   2

12.2. ileum data day 44

res <- read.table("./input_data/ileum_data_volcano_plot_for_R_d44_ecn_padj.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

head(res)
##     Gene log2FoldChange    pvalue padj
## 1    C3     -0.15456308 0.5452612    1
## 2    C5     -0.28635917 0.4276373    1
## 3 DEFB1     -0.72634279 0.3188917    1
## 4 GAPDH     -0.01462537 0.8921610    1
## 5  IL1B      0.33275775 0.3512406    1
## 6   IL8      0.02795401 0.9142406    1
colnames(res) <- c("Gene","log2FoldChange","pvalue", "padj")

res$padj <- rownames(res)

res$padj <- p.adjust(res[,3], method = "fdr")

# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.5 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))

# The pdf figure should be used to really look at the data, as it has better visibility
pdf(file = "./figures/ileum_data_volcano_plot_for_R_d44_ecn_padj_xy_limits.pdf", height = 10, width = 15)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, pvalue<1 & abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png 
##   2

12.3. ileum data day 27 vs 44

res <- read.table("./input_data/ileum_data_volcano_plot_for_R_d27_vs_d44_ecn_padj.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

head(res)
##     Gene log2FoldChange      pvalue padj
## 1    C3      0.46283920 0.009418346    1
## 2    C5      0.74245387 0.015504338    1
## 3 DEFB1     -1.34201104 0.002481060    1
## 4 GAPDH      0.09427848 0.164278001    1
## 5  IL1B     -0.09544802 0.849716378    1
## 6   IL8      0.79363040 0.000339299    1
colnames(res) <- c("Gene","log2FoldChange","pvalue", "padj")

res$padj <- rownames(res)

res$padj <- p.adjust(res[,3], method = "fdr")

# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot")) 

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.05 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))

pdf(file = "./figures/ileum_data_volcano_plot_for_R_d27_vs_d44_ecn_padj_xy_limits.pdf", height = 10, width = 15)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot"))

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.05 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png 
##   2
# The pdf figure should be used to really look at the data, as it has better visibility
pdf(file = "./figures/ileum_data_volcano_plot_for_R_d27_vs_d44_ecn_padj_xy_limits_h20_w30.pdf", height = 20, width = 30)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot")) 

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.05 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png 
##   2

12.4. colon data day 27

res <- read.table("./input_data/colon_data_volcano_plot_for_R_d27_padj.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

head(res)
##     Gene log2FoldChange     pvalue padj
## 1    C3       0.3053966 0.33290755    1
## 2    C5       0.9739564 0.02975623    1
## 3  IL1B       0.7899645 0.59146375    1
## 4   IL8       0.1766245 0.78980545    1
## 5 IL12A       0.6721809 0.17577574    1
## 6  IFNA      -1.0590843 0.31255028    1
colnames(res) <- c("Gene","log2FoldChange","pvalue", "padj")

res$padj <- rownames(res)

res$padj <- p.adjust(res[,3], method = "fdr")

# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.5 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))

# The pdf figure should be used to really look at the data, as it has better visibility
pdf(file = "./figures/colon_data_volcano_plot_for_R_d27_padj_xy_limits.pdf", height = 10, width = 15)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, pvalue<1 & abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png 
##   2

12.5. colon data day 44

res <- read.table("./input_data/colon_data_volcano_plot_for_R_d44_padj.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

head(res)
##     Gene log2FoldChange     pvalue padj
## 1    C3    -0.381683678 0.09031378    1
## 2    C5    -0.003133154 0.99435307    1
## 3  IL1B     0.045352502 0.95020697    1
## 4   IL8     0.465913302 0.31490536    1
## 5 IL12A     0.534419071 0.51276538    1
## 6  IFNA    -0.254140818 0.66350955    1
colnames(res) <- c("Gene","log2FoldChange","pvalue", "padj")

res$padj <- rownames(res)

res$padj <- p.adjust(res[,3], method = "fdr")

# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.5 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))

# The pdf figure should be used to really look at the data, as it has better visibility
pdf(file = "./figures/colon_data_volcano_plot_for_R_d44_padj_xy_limits.pdf", height = 10, width = 15)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot", xlim=c(-3.2,2.8), ylim=c(0, 2.4)))

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, pvalue<1 & abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png 
##   2

12.6. colon data day 27 vs 44

res <- read.table("./input_data/colon_data_volcano_plot_for_R_d27_vs_d44_padj.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

head(res)
##     Gene log2FoldChange      pvalue padj
## 1    C3      0.35658942 0.062154613    1
## 2    C5      0.75487339 0.024536378    1
## 3  IL1B     -0.11063755 0.888882580    1
## 4   IL8      1.18235754 0.004382547    1
## 5 IL12A      0.06043495 0.896050835    1
## 6  IFNA     -0.64101309 0.324858334    1
colnames(res) <- c("Gene","log2FoldChange","pvalue", "padj")

res$padj <- rownames(res)

res$padj <- p.adjust(res[,3], method = "fdr")

# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot")) 

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.05 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
with(subset(res, abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))

pdf(file = "./figures/colon_data_volcano_plot_for_R_d27_vs_d44_padj_xy_limits.pdf", height = 10, width = 15)
# Make a basic volcano plot
with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot")) 

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.05 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
with(subset(res, abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png 
##   2
# The pdf figure should be used to really look at the data, as it has better visibility
pdf(file = "./figures/colon_data_volcano_plot_for_R_d27_vs_d44_padj_xy_limits_h20_w30.pdf", height = 20, width = 30)

with(res, plot(log2FoldChange, -log10(pvalue), pch=20, main="Volcano plot"))

# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(res, padj<.05 ), points(log2FoldChange, -log10(pvalue), pch=20, col="red"))
with(subset(res, abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="orange"))
with(subset(res, padj<.05 & abs(log2FoldChange)>1), points(log2FoldChange, -log10(pvalue), pch=20, col="green"))

# Label points with the textxy function from the calibrate plot
library(calibrate)
with(subset(res, padj<.05 & abs(log2FoldChange)>0.1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
with(subset(res, abs(log2FoldChange)>1), textxy(log2FoldChange, -log10(pvalue), labs=Gene, cex=.8))
dev.off()
## png 
##   2

13 Immunological analyses

# Here, the analysis of one of the immunological data files is showcased. All other immunological files that are present within the data repository were run with the same code, although with alternating parameters.
my_data <- read.table("./input_data/Flow_cytometry_NKT_PBMC_Memory_Ki67_EcN.csv",
                      header = TRUE,
                      sep = ",",
                      stringsAsFactors = FALSE)

boxplot(score ~ group*time,
        col=c("white","lightgray"),my_data)

fm1 <- lme(score ~ time + group + time * group, data = my_data, method = "REML", random = ~ 1 | department, na.action=na.omit)
anova(fm1)
##             numDF denDF   F-value p-value
## (Intercept)     1    52 2649.0293  <.0001
## time            3    52  114.8167  <.0001
## group           1    52    0.7161  0.4013
## time:group      3    52    2.8845  0.0444
lme.pre.pd1 <- lme(score ~ time + group + time * group,
                   data = my_data, method = "REML",
                   random = ~ 1 | department, na.action=na.omit)
summary(lme.pre.pd1)
## Linear mixed-effects model fit by REML
##  Data: my_data 
##        AIC      BIC    logLik
##   424.6632 444.7366 -202.3316
## 
## Random effects:
##  Formula: ~1 | department
##          (Intercept) Residual
## StdDev: 0.0002292705 8.246591
## 
## Fixed effects: score ~ time + group + time * group 
##                     Value Std.Error DF   t-value p-value
## (Intercept)      16.52500  2.915610 52  5.667767  0.0000
## timet2           52.51250  4.123296 52 12.735565  0.0000
## timet3           39.40000  4.123296 52  9.555463  0.0000
## timet4           57.30000  4.123296 52 13.896651  0.0000
## groupEcN          9.03214  4.268016 52  2.116239  0.0391
## timet2:groupEcN -14.19464  5.934436 52 -2.391911  0.0204
## timet3:groupEcN -12.60714  5.934436 52 -2.124405  0.0384
## timet4:groupEcN -15.64464  5.934436 52 -2.636248  0.0110
##  Correlation: 
##                 (Intr) timet2 timet3 timet4 grpEcN tm2:EN tm3:EN
## timet2          -0.707                                          
## timet3          -0.707  0.500                                   
## timet4          -0.707  0.500  0.500                            
## groupEcN        -0.683  0.483  0.483  0.483                     
## timet2:groupEcN  0.491 -0.695 -0.347 -0.347 -0.719              
## timet3:groupEcN  0.491 -0.347 -0.695 -0.347 -0.719  0.517       
## timet4:groupEcN  0.491 -0.347 -0.347 -0.695 -0.719  0.517  0.517
## 
## Standardized Within-Group Residuals:
##          Min           Q1          Med           Q3          Max 
## -2.002342497 -0.511250204  0.003031555  0.389121820  2.482844067 
## 
## Number of Observations: 63
## Number of Groups: 4
lme.pre.pd1
## Linear mixed-effects model fit by REML
##   Data: my_data 
##   Log-restricted-likelihood: -202.3316
##   Fixed: score ~ time + group + time * group 
##     (Intercept)          timet2          timet3          timet4        groupEcN 
##       16.525000       52.512500       39.400000       57.300000        9.032143 
## timet2:groupEcN timet3:groupEcN timet4:groupEcN 
##      -14.194643      -12.607143      -15.644643 
## 
## Random effects:
##  Formula: ~1 | department
##          (Intercept) Residual
## StdDev: 0.0002292705 8.246591
## 
## Number of Observations: 63
## Number of Groups: 4
anova(lme.pre.pd1)
##             numDF denDF   F-value p-value
## (Intercept)     1    52 2649.0293  <.0001
## time            3    52  114.8167  <.0001
## group           1    52    0.7161  0.4013
## time:group      3    52    2.8845  0.0444
# There is no need for temporal autocorrelation (p-value not significant)

lme.pre.pd2 <- lme(score ~ time + group + time * group,
                   data = my_data, method = "REML",
                   cor = corAR1(), random = ~ 1 | id | department, na.action=na.omit)
lme.pre.pd2
## Linear mixed-effects model fit by REML
##   Data: my_data 
##   Log-restricted-likelihood: -200.1735
##   Fixed: score ~ time + group + time * group 
##     (Intercept)          timet2          timet3          timet4        groupEcN 
##       16.487635       52.514678       39.008222       57.567593        8.729937 
## timet2:groupEcN timet3:groupEcN timet4:groupEcN 
##      -13.400865      -12.131159      -15.449448 
## 
## Random effects:
##  Formula: ~1 | id | department
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev       Corr  
## (Intercept) 2.952821e-04 (Intr)
## 1 | idTRUE  5.081642e-11 0     
## Residual    8.106883e+00       
## 
## Correlation Structure: AR(1)
##  Formula: ~1 | department 
##  Parameter estimate(s):
##       Phi 
## -0.302995 
## Number of Observations: 63
## Number of Groups: 4
anova(lme.pre.pd1, lme.pre.pd2)
##             Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## lme.pre.pd1     1 10 424.6632 444.7366 -202.3316                        
## lme.pre.pd2     2 13 426.3469 452.4422 -200.1735 1 vs 2 4.316316  0.2293
# Rest of code

aov.pd.ls1 <- anova(lme.pre.pd1)
aov.pd.ls1
##             numDF denDF   F-value p-value
## (Intercept)     1    52 2649.0293  <.0001
## time            3    52  114.8167  <.0001
## group           1    52    0.7161  0.4013
## time:group      3    52    2.8845  0.0444
res.aov2 <- aov(score ~ group + time, data = my_data)
summary(res.aov2)
##             Df Sum Sq Mean Sq F value Pr(>F)    
## group        1      8       8   0.109  0.743    
## time         3  23465    7822 104.800 <2e-16 ***
## Residuals   58   4329      75                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
res.aov3 <- aov(score ~ group + time + group:time, data = my_data)
summary(res.aov3)
##             Df Sum Sq Mean Sq F value Pr(>F)    
## group        1      8       8   0.120 0.7309    
## time         3  23465    7822 115.016 <2e-16 ***
## group:time   3    588     196   2.885 0.0438 *  
## Residuals   55   3740      68                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
TukeyHSD(res.aov2)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = score ~ group + time, data = my_data)
## 
## $group
##                   diff       lwr     upr     p adj
## EcN-Control -0.7184476 -5.076465 3.63957 0.7425918
## 
## $time
##            diff        lwr       upr     p adj
## t2-t1  45.74020  37.527410 53.952987 0.0000000
## t3-t1  33.42145  25.208660 41.634237 0.0000000
## t4-t1  49.80270  41.589910 58.015487 0.0000000
## t3-t2 -12.31875 -20.397989 -4.239511 0.0009148
## t4-t2   4.06250  -4.016739 12.141739 0.5478712
## t4-t3  16.38125   8.302011 24.460489 0.0000087
#lmer (lme4)

fit1 <- lmer(score ~ group + time + (1 | department), REML=F, data = my_data)
## boundary (singular) fit: see ?isSingular
summary(fit1)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: score ~ group + time + (1 | department)
##    Data: my_data
## 
##      AIC      BIC   logLik deviance df.resid 
##    459.3    474.3   -222.6    445.3       56 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2848 -0.6334 -0.2023  0.6662  2.7628 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  department (Intercept)  0.00    0.000   
##  Residual               68.71    8.289   
## Number of obs: 63, groups:  department, 4
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   21.561      2.352   9.167
## groupEcN      -1.759      2.090  -0.842
## timet2        45.775      2.980  15.361
## timet3        33.456      2.980  11.227
## timet4        49.837      2.980  16.724
## 
## Correlation of Fixed Effects:
##          (Intr) grpEcN timet2 timet3
## groupEcN -0.415                     
## timet2   -0.644 -0.023              
## timet3   -0.644 -0.023  0.516       
## timet4   -0.644 -0.023  0.516  0.516
## convergence code: 0
## boundary (singular) fit: see ?isSingular
drop1(fit1, test = "Chisq")
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## Single term deletions
## 
## Model:
## score ~ group + time + (1 | department)
##        npar    AIC     LRT Pr(Chi)    
## <none>      459.27                    
## group     1 457.98   0.705  0.4012    
## time      3 570.42 117.150  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit2 <- lmer(score ~ group + time + (1 + time | id), REML=F, data = my_data)
## boundary (singular) fit: see ?isSingular
summary(fit2)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: score ~ group + time + (1 + time | id)
##    Data: my_data
## 
##      AIC      BIC   logLik deviance df.resid 
##    474.2    508.5   -221.1    442.2       47 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5995 -0.6195 -0.1262  0.7092  2.8514 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr             
##  id       (Intercept)  7.733   2.781                     
##           timet2       1.122   1.059    -1.00            
##           timet3       3.870   1.967    -1.00  1.00      
##           timet4       6.444   2.539     1.00 -1.00 -1.00
##  Residual             58.751   7.665                     
## Number of obs: 63, groups:  id, 8
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   21.471      2.388   8.990
## groupEcN      -1.819      1.933  -0.941
## timet2        45.895      2.783  16.489
## timet3        33.576      2.844  11.804
## timet4        49.957      2.900  17.224
## 
## Correlation of Fixed Effects:
##          (Intr) grpEcN timet2 timet3
## groupEcN -0.376                     
## timet2   -0.637 -0.024              
## timet3   -0.670 -0.024  0.530       
## timet4   -0.431 -0.023  0.446  0.401
## convergence code: 0
## boundary (singular) fit: see ?isSingular
drop1(fit2, test = "Chisq")
## boundary (singular) fit: see ?isSingular
## boundary (singular) fit: see ?isSingular
## Single term deletions
## 
## Model:
## score ~ group + time + (1 + time | id)
##        npar    AIC    LRT   Pr(Chi)    
## <none>      474.18                     
## group     1 473.06  0.877    0.3491    
## time      3 501.07 32.887 3.403e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

14. R version and Packages used (with versions)

version
##                _                           
## platform       x86_64-w64-mingw32          
## arch           x86_64                      
## os             mingw32                     
## system         x86_64, mingw32             
## status                                     
## major          3                           
## minor          6.1                         
## year           2019                        
## month          07                          
## day            05                          
## svn rev        76782                       
## language       R                           
## version.string R version 3.6.1 (2019-07-05)
## nickname       Action of the Toes
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19043)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_Netherlands.1252  LC_CTYPE=English_Netherlands.1252   
## [3] LC_MONETARY=English_Netherlands.1252 LC_NUMERIC=C                        
## [5] LC_TIME=English_Netherlands.1252    
## 
## attached base packages:
##  [1] splines   parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] calibrate_1.7.7             gamlss_5.1-7               
##  [3] gamlss.dist_5.1-7           gamlss.data_5.1-4          
##  [5] datarium_0.1.0              rstatix_0.6.0              
##  [7] forcats_0.5.0               stringr_1.4.0              
##  [9] readr_1.3.1                 tidyr_1.1.1                
## [11] tibble_3.0.3                tidyverse_1.3.0            
## [13] corrplot_0.84               metamicrobiomeR_1.1        
## [15] ggvegan_0.1-0               psych_2.0.7                
## [17] DESeq2_1.26.0               SummarizedExperiment_1.16.1
## [19] DelayedArray_0.12.3         BiocParallel_1.20.1        
## [21] matrixStats_0.56.0          Biobase_2.46.0             
## [23] GenomicRanges_1.38.0        GenomeInfoDb_1.22.1        
## [25] IRanges_2.20.2              S4Vectors_0.24.4           
## [27] BiocGenerics_0.32.0         DT_0.13                    
## [29] propr_4.2.6                 CoDaSeq_0.99.6             
## [31] car_3.0-8                   carData_3.0-4              
## [33] zCompositions_1.3.4         truncnorm_1.0-8            
## [35] NADA_1.6-1.1                compositions_2.0-0         
## [37] ALDEx2_1.18.0               RColorBrewer_1.1-2         
## [39] viridis_0.5.1               viridisLite_0.3.0          
## [41] purrr_0.3.4                 data.table_1.12.8          
## [43] multcompView_0.1-8          multcomp_1.4-13            
## [45] TH.data_1.0-10              MASS_7.3-51.4              
## [47] mvtnorm_1.1-1               emmeans_1.4.8              
## [49] picante_1.8.2               vegan_2.5-6                
## [51] permute_0.9-5               Hmisc_4.4-0                
## [53] Formula_1.2-3               survival_3.1-12            
## [55] lattice_0.20-38             decontam_1.6.0             
## [57] pheatmap_1.0.12             apeglm_1.8.0               
## [59] sciplot_1.2-0               lme4_1.1-23                
## [61] Matrix_1.2-17               nlme_3.1-140               
## [63] ggrepel_0.8.2               knitr_1.29                 
## [65] scales_1.1.1                reshape2_1.4.4             
## [67] ape_5.4                     dplyr_1.0.1                
## [69] plyr_1.8.6                  ggpubr_0.4.0               
## [71] microbiomeutilities_0.99.02 microbiome_2.1.26          
## [73] ggplot2_3.3.2               phyloseq_1.30.0            
## 
## loaded via a namespace (and not attached):
##   [1] tidyselect_1.1.0       RSQLite_2.2.0          AnnotationDbi_1.48.0  
##   [4] htmlwidgets_1.5.1      grid_3.6.1             Rtsne_0.15            
##   [7] munsell_0.5.0          codetools_0.2-16       statmod_1.4.34        
##  [10] withr_2.2.0            colorspace_1.4-1       rstudioapi_0.11       
##  [13] robustbase_0.93-6      bayesm_3.1-4           ggsignif_0.6.0        
##  [16] labeling_0.3           bbmle_1.0.23.1         GenomeInfoDbData_1.2.2
##  [19] mnormt_1.5-7           farver_2.0.3           bit64_4.0.2           
##  [22] rhdf5_2.30.1           coda_0.19-3            vctrs_0.3.2           
##  [25] generics_0.0.2         xfun_0.16              R6_2.4.1              
##  [28] locfit_1.5-9.4         bitops_1.0-6           assertthat_0.2.1      
##  [31] nnet_7.3-12            gtable_0.3.0           sandwich_2.5-1        
##  [34] rlang_0.4.7            genefilter_1.68.0      acepack_1.4.1         
##  [37] broom_0.7.0            checkmate_2.0.0        modelr_0.1.8          
##  [40] yaml_2.2.1             abind_1.4-5            backports_1.1.8       
##  [43] tensorA_0.36.1         tools_3.6.1            ellipsis_0.3.1        
##  [46] biomformat_1.14.0      Rcpp_1.0.5             base64enc_0.1-3       
##  [49] zlibbioc_1.32.0        RCurl_1.98-1.2         rpart_4.1-15          
##  [52] zoo_1.8-8              haven_2.3.1            cluster_2.1.0         
##  [55] fs_1.4.1               magrittr_1.5           openxlsx_4.1.5        
##  [58] reprex_0.3.0           hms_0.5.3              evaluate_0.14         
##  [61] xtable_1.8-4           XML_3.99-0.3           rio_0.5.16            
##  [64] emdbook_1.3.12         jpeg_0.1-8.1           readxl_1.3.1          
##  [67] gridExtra_2.3          compiler_3.6.1         bdsmatrix_1.3-4       
##  [70] crayon_1.3.4           minqa_1.2.4            htmltools_0.5.0       
##  [73] mgcv_1.8-28            geneplotter_1.64.0     lubridate_1.7.9       
##  [76] DBI_1.1.0              dbplyr_1.4.4           boot_1.3-22           
##  [79] ade4_1.7-15            cli_2.0.2              gdata_2.18.0          
##  [82] igraph_1.2.5           pkgconfig_2.0.3        numDeriv_2016.8-1.1   
##  [85] foreign_0.8-71         xml2_1.3.2             foreach_1.5.0         
##  [88] annotate_1.64.0        multtest_2.42.0        XVector_0.26.0        
##  [91] estimability_1.3       rvest_0.3.6            digest_0.6.25         
##  [94] Biostrings_2.54.0      rmarkdown_2.3          cellranger_1.1.0      
##  [97] htmlTable_2.0.1        curl_4.3               gtools_3.8.2          
## [100] nloptr_1.2.2.1         lifecycle_0.2.0        jsonlite_1.6.1        
## [103] Rhdf5lib_1.8.0         fansi_0.4.1            pillar_1.4.6          
## [106] httr_1.4.2             DEoptimR_1.0-8         glue_1.4.1            
## [109] zip_2.0.4              png_0.1-7              iterators_1.0.12      
## [112] bit_4.0.4              stringi_1.4.6          blob_1.2.1            
## [115] latticeExtra_0.6-29    memoise_1.1.0