


@ 352,7 +352,7 @@ kw_no2_post < lapply(





# QUESTION: How does the significance change with another p.adjust.method? What is your overall interpretation of the results? 














### Principal component analysis #### 




### Principal component analysis (PCA) #### 









# The math behind PCA and RDA is very similar > the difference is that RDA is constrained, 




# while PCA is not. So we can use the same R function, but without providing any predictor variables. 



@ 365,12 +365,17 @@ kw_no2_post < lapply(





# data with a linear correlation structure. 




env_pca < rda(meta[, 7:11], scale = T) 









# This unconstrained ordination object can be found in $CA of the env_pca object. 




# To explore this object further, use str() and the help function. 




# Look at contribution of all PCs (eigenvalues) 




barplot(env_pca$CA$eig/sum(env_pca$CA$eig)) 









# Interpretation of PCA: https://sites.google.com/site/mb3gustame/indirectgradientanalysis/pca 




# Scaling 2: correlation plot 









# I am jumping ahead here already to the plotting workshop... 




# save coordinates of default plot as speparate R object 




env_pca_scaling2 < plot(env_pca) 




env_pca_scaling2 < plot(env_pca, scaling = 2) 









# start with empty plot 




# this will define the range for x and y axis automatically 



@ 428,7 +433,7 @@ legend(





title = "Salinity" 




) 




legend( 




"bottomright", 




"topleft", 




legend = levels(meta$day), 




col = day.color, 




pch = 15, 



@ 440,7 +445,7 @@ legend(










### Hierarchical clustering #### 









# Calcualte BrayCurtis dissimilarities 




# Calculate BrayCurtis dissimilarities 




bc_pro < vegdist(t(otu_pro_rel)) 









# COMMENT: It is very important that BrayCurtis dissimilarities are calculated based on the percentages 



@ 452,7 +457,8 @@ bc_pro_clust < hclust(bc_pro, method = "average")





bc_pro_clust < hclust(bc_pro, method = "single") 









# Determine optimal linkage algorithm 




cor(bc_pro, cophenetic(bc_pro_clust)) 




stats::cor(bc_pro, cophenetic(bc_pro_clust)) 




plot(bc_pro, cophenetic(bc_pro_clust)) 




# topology of average linkage cluster diagram has highest correlation with input dissimilarity matrix 









# Quick and dirty plot 



@ 465,7 +471,7 @@ rect.hclust(bc_pro_clust, h = 0.5)





bc_pro_groups < cutree(bc_pro_clust, h = 0.5) 














### Principal coordinate analysis #### 




### Principal coordinate analysis (PCoA) #### 









# In R, the easiest way to calculate a PCoA is to use the betadisper function. 




# This function also assesses withingroup heterogeneity. 



@ 476,10 +482,14 @@ betadisper_pro < betadisper(





meta$salinity, 




sqrt.dist = T 




) 




str(betadisper_pro) 









# Show PCoA 




plot(betadisper_pro) 









# Extract PCoA object for different plotting layout 




View(betadisper_pro$vectors) 









# Get eigenvalues 




barplot(betadisper_pro$eig/sum(betadisper_pro$eig) * 100, las = 2, cex.names = 0.5, ylab = "% variation") 








@ 489,7 +499,7 @@ barplot(betadisper_pro$eig/sum(betadisper_pro$eig) * 100, las = 2, cex.names = 0





boxplot(betadisper_pro) 














### Nonmetric multidimensional scaling #### 




### Nonmetric multidimensional scaling (NMDS) #### 









# Calculate NMDS based on BrayCurtis dissimilarity (default) 




# It is possible to supply the matrix with the proportions directly, or already the 



@ 520,6 +530,13 @@ points(





# QUESTION: How well represented are the original BrayCurtis dissimilarities? 




# The original BrayCurtis dissimilarities should always be considered in the interpretation. 









# Fit additional variable onto the ordination 




envfit_pro < envfit(nmds_pro, meta[, 7:11]) 




plot(envfit_pro) 









# Check that fitted variables behave linearly before interpreting envfit 




ordisurf(nmds_pro, meta$sio2_uM) 














### PERMANOVA #### 








@ 534,7 +551,7 @@ otu_pro_perm < otu_pro_perm[rowSums(otu_pro_perm) > 0, ]










perm_ctrl < how( 




within = Within(type = "free"), 




plots = Plots(strata = paste(meta_stats$salinity, meta_stats$replicate, sep = "_"), type = "free"), 




plots = Plots(strata = meta_stats$replicate, type = "free"), 




nperm = 999 




) 








@ 549,7 +566,7 @@ permanova_out < adonis2(t(otu_pro_perm) ~ day * salinity, data = meta_stats, sq





### ANOSIM #### 









# Similar to betadisper, ANOSIM is commonly run on the interaction, but we will use salinity 




# as groupsing factor here for demonstration purposes. 




# as grouping factor here for demonstration purposes. 




# Similar to KruskalWallis, ANOSIM is unifactorial. 









# Calculate ANOSIM based on BrayCurtis dissimilarity 



@ 560,12 +577,44 @@ anosim(t(otu_pro_perm), meta_stats$salinity)





# Calculate pairwise ANOSIM statistics 




# Download function from: https://raw.githubusercontent.com/chassenr/Tutorials/master/R_roundtable_sequence_analysis/anosimPosthoc.R 









source("C:/Users/chassenrueck/Documents/Repos/Tutorials/R_roundtable_sequence_analysis/anosimPosthoc.R") 




ANOSIMposthoc=function(M,E,distance="bray", padj="fdr"){ 




E=droplevels(E) 




Mlist=list() 




for(i in 1:length(levels(E))){ 




Mlist[[i]]=M[E==levels(E)[i],] 




} 









Elist=list() 




for(i in 1:length(levels(E))){ 




Elist[[i]]=as.numeric(E[E==levels(E)[i]]) 




} 









result=list(anosimR=matrix(NA,length(levels(E)),length(levels(E))), 




anosimP=matrix(NA,length(levels(E)),length(levels(E))), 




anosimPadj=matrix(NA,length(levels(E)),length(levels(E)))) 




colnames(result$anosimR)=colnames(result$anosimP)=levels(E) 




rownames(result$anosimR)=rownames(result$anosimP)=levels(E) 




for(i in 1:(length(levels(E))1)){ 




for(j in (i+1):length(levels(E))){ 




temp=anosim(rbind(Mlist[[i]],Mlist[[j]]),c(Elist[[i]],Elist[[j]]),distance=distance) 




result$anosimR[j,i]=temp$statistic 




result$anosimP[j,i]=temp$signif 




} 




} 




result$anosimPadj=matrix(p.adjust(as.vector(result$anosimP),method=padj, 




n=length(which(!is.na(as.vector(result$anosimP))))), 




length(levels(E)),length(levels(E))) 




colnames(result$anosimPadj)=levels(E) 




rownames(result$anosimPadj)=levels(E) 




return(result) 




} 









ANOSIMposthoc(t(otu_pro_perm), meta_stats$salinity) 









# I recommend to focus on the ANOSIM R in the interpretation. 




# R < 0.3: weak separation 




# R < 0.3: weak to no separation 




# R > 0.7: strong separation 




# see also https://pubmed.ncbi.nlm.nih.gov/17892477/ 









# QUESTION: What is the difference in the interpretation between PERMANOVA and ANOSIM? 




# Can you imagine cases, where there is disagreement between the result of these 2 methods? 



@ 579,12 +628,25 @@ ANOSIMposthoc(t(otu_pro_perm), meta_stats$salinity)










simper_pro < simper(t(otu_pro_perm), meta_stats$salinity) 









# Let's look at the contribution of the most important taxa according to simper 




barplot(simper_pro$control_psu12$cusum[1:100]) 









# Which taxa contribute cumulatively to 50% of the observed differences between groups? 




simper_pro$control_psu12$species[simper_pro$control_psu12$ord[1:which(simper_pro$control_psu12$cusum >= 0.5)[1]]] 




tmp < simper_pro$control_psu12$species[simper_pro$control_psu12$ord[1:which(simper_pro$control_psu12$cusum >= 0.5)[1]]] 









barplot( 




otu_pro_perm[tmp, meta_stats$salinity %in% c("control", "psu12")], 




col = rainbow(length(tmp)), 




las = 2, 




cex.names = 0.5 




) 














### Redundancy Analysis #### 









# WARNING: Be sure that the relationships that you are investigating are actually linear, 




# when applying this method. 









# Inspect collinearity between predictor variables. 




# As RDA will assume linear combinations of the predictors, we will use pearson correlations. 




# As a rule of thumb, anything with an r > 0.8 will be highly problematic. 



@ 594,7 +656,7 @@ cor(meta_stats[, 7:11])










# Our response variable (community matrix) is compositional. 




# We will apply a clrtransformation to coerce it into something resembling normality... 




# Since the otu count matrix is sparce, rare otus have to be exlcuded 




# Since the otu count matrix is sparce, rare otus have to be excluded 









otu_pro_sub < otu_pro[rownames(otu_pro_perm), colnames(otu_pro_perm)] 




otu_pro_filt < otu_pro_sub[apply(otu_pro_perm, 1, function(x) sum(x >= 0.1) >= length(x) * 0.1), ] 



@ 659,7 +721,7 @@ plot(rda_otu_varpart)





### Differential abundance #### 









# Whether you are working with omics data or any other multivariate response, 




# usually the aim is not just to see if there is an overal change, but which variables 




# usually the aim is not just to see if there is an overall change, but which variables 




# (taxa) are responsible. 









# For compositional sequence data, there is a package called ALDEx2, which accounts for compositionality 



@ 676,6 +738,10 @@ otu_pro_d21 < otu_pro[, rownames(meta_d21)]





otu_pro_d21_filt < otu_pro_d21[apply(prop.table(otu_pro_d21, 2) * 100, 1, function(x) sum(x >= 0.1) >= 3), ] 




dim(otu_pro_d21_filt) 









# Removal of sequence 




# I would be careful witha filter that is removing too much of your data (i.e. more than 3050%) 




summary(colSums(otu_pro_d21_filt)/colSums(otu_pro_d21)) 









# Check that filtering did not change beta diversity patterns 




plot( 




vegdist(t(prop.table(otu_pro_d21, 2) * 100)), 



@ 701,13 +767,19 @@ sum(aldex_out$glm.eBH <= 0.05)





summary(aldex_out$kw.eBH) 




hist(aldex_out$kw.eBH, breaks = 50) 




abline(v = 0.05) 




sum(aldex_out$kw.eBH <= 0.05) 




sum(aldex_out$kw.eBH <= 0.1) 









# How many ASVs detected as differentially enriched? 




# How many ASVs are detected as differentially enriched? 




# Due to the low sample size, it is possible that pvalues are not reliable. 




# Especially for nonparametric tests, it may not be mathematically possible to obtain 




# pvalues low enough to not be above the significance threshold after correction. 









# I recommend to use the pvalues obtained here, only as a means to find a starting point 




# for the interpretation, i.e. to choose some taxa (out of hundreds or thousands) to begin 




# describing patterns between experimental conditions. If those patterns do not make sense, i.e. 




# they are not supported by independent analyses or previous literature, I would use them only 




# to postulate new hypotheses and not to make final conclusions about an ecological phenomenon. 









# Compare parametric and nonparametric results 




plot(aldex_out$kw.ep, aldex_out$glm.eBH) 




abline(v = 0.05, h = 0.05) 



@ 729,7 +801,7 @@ dim(otu_pro_diff_filt)





# This is actually the first time, that we would like to get more than just a sequence number for the taxa. 




tax_pro_diff_filt < tax_pro[rownames(otu_pro_diff_filt), ] 









# While it is usually recommended to build a heatmap from sratch using basic plotting elements, 




# While it is usually recommended to build a heatmap from scratch using basic plotting elements, 




# we will use a shortcut here (provided by the gplots package). 




# For better visibility of small changes, show squareroot transformed proportions. 




heatmap.2( 



@ 935,7 +1007,6 @@ View(tax_pro[cluster.pick.rep[[i]], ])





# Correlation of eigengenes with environmental parameters 




cor_me_env < cor(meta_stats[, 7:11], cluster.ME$eigengenes) 









# There seems to be something going on between po4 and M2+4+10, no3 and M5, no2 and M6, nh4 and M9... 




# However, who says that the relationship should be expected to be linear? 




# Also, we did not check assumptions... 




# In general, such an analysis may be more suitable for environmental samples... 



