Analaysis of Metagenomics data from Salter 2014

Reagent and laboratory contamination can critically impact sequence-based microbiome analyses, Salter et al., 2014, https://doi.org/10.1186/s12915-014-0087-z.

Load needed packages:

#load packages
packages <- c("decontam", "ggplot2", "reshape2", "phyloseq", "dada2", "gridExtra", "vegan", "RColorBrewer", "dplyr", "knitr", "cowplot", "openxlsx", "circlize")
sapply(packages, require, character.only = TRUE)

##     decontam      ggplot2     reshape2     phyloseq        dada2 
##         TRUE         TRUE         TRUE         TRUE         TRUE 
##    gridExtra        vegan RColorBrewer        dplyr        knitr 
##         TRUE         TRUE         TRUE         TRUE         TRUE 
##      cowplot     openxlsx     circlize 
##         TRUE        FALSE         TRUE

#settings
theme_set(theme_bw())
options(stringsAsFactors = FALSE)
path.ampli <- "~/DecontamManuscript/Analyses/Salter16S" # CHANGE ME
path.meta <- "~/DecontamManuscript/Analyses/SalterMeta" # CHANGE ME
path.out <- "~/DecontamManuscript/Analyses/Figures" # CHANGE ME

#color palettes
cbbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7","#000000")
methodPalette <- c(Prevalence="turquoise3", Frequency="orangered1", Combined="darkorchid4")
prevalencePalette <- c("2" = "#edf8e9", "3-5" = "#bae4b3", "6-10" = "#74c476", "11+" = "#238b45")

Process 16S Amplicon Data

This section analyzes the Salter et al. dilution-series amplicon sequencing data. Data availability is described in that paper: “Samples for the S. bongori culture 16S rRNA gene profiling … are deposited under ENA project accession EMBL: ERP006737; sample details and individual accession numbers are detailed in Additional file 1: Table S1a.”

Once downloaded, the amplicon sequencing data was processed using the dada2 R package, producing a table of exact amplicon sequence variants (ASV). Those processed files are included as part of this reproducible analysis in the Salter16S directory. In addition, the DADA2 processing steps are included here as well for completeness, but commented out:

Read in the sample metadata:

df.ampli <- read.csv(file.path(path.ampli, "Salter1.csv"), header=TRUE, stringsAsFactors = FALSE)
rownames(df.ampli) <- df.ampli$Run.accession

Process the forward reads with DADA2 (must be downloaded from ENA):

#path.fastq <- "~/Salter" # CHANGE ME, to directory containing the downloaded fastq.gz files
#fnFs <- list.files(path.fastq, pattern="_1.fastq.gz", full.names=TRUE)
#names(fnFs) <- sapply(strsplit(basename(fnFs), "_"), `[`, 1) # Name by Run accession
#fastqFs <- fnFs[df.ampli$Run.accession]
#fwdFs <- file.path(path.fastq, "FWD", basename(fastqFs))
#names(fwdFs) <- names(fastqFs)
#outF <- filterAndTrim(fastqFs, fwdFs, rm.phix=TRUE, truncLen=240, maxEE=3, multithread=TRUE)
#drp <- derepFastq(fwdFs)
#err <- learnErrors(drp, multithread=TRUE)
#dd <- dada(drp, err=err, pool=TRUE, multithread=TRUE) # pool=TRUE to share power with the low count samples
#sta <- makeSequenceTable(dd)
#st <- removeBimeraDenovo(sta, method="pooled", verbose=TRUE)
#tax <- assignTaxonomy(st, "~/tax/silva_nr_v128_train_set.fa.gz", multithread=TRUE)
#saveRDS(st, file.path(path.ampli, "salter16S_st.rds"))
#saveRDS(tax, file.path(path.ampli, "salter16S_tax.rds"))

The RDS objects storing the output of the DADA2 processing are included as part of this reproducible analysis in the Salter16S directory.

Read in the DADA2-processed ASV tables and taxonomic assignments:

st.ampli <- readRDS(file.path(path.ampli, "salter16S_st.rds"))
tax.ampli <- readRDS(file.path(path.ampli, "salter16S_tax.rds"))
ft.ampli <- sweep(st.ampli, 1, rowSums(st.ampli), "/")
df.ampli$Dilution.number[df.ampli$Dilution.number == "0 (original culture)"] <- "0"
df.ampli$Dilution.number[df.ampli$Dilution.number == "Negative control"] <- "Neg"
conc.dict <- c("0"=1e3, "0 (original culture)"=1e3, "1"=1e2, "2"=1e1, "3"=1, "4"=1, "5"=1, "Neg"=1)
df.ampli$conc <- conc.dict[df.ampli$Dilution.number]
identical(rownames(df.ampli), rownames(st.ampli)) # TRUE

## [1] TRUE

ps.ampli <- phyloseq(otu_table(st.ampli, taxa_are_rows=FALSE), tax_table(tax.ampli), sample_data(df.ampli))

Plot read numbers for each samples:

p.depth.ampli <- ggplot(data=df.ampli, aes(x=Dilution.number, y=Post.processing.read.count, color=Dilution.number)) + 
                 geom_point() + facet_grid(PCR.cycles~Processing.Institute) + 
                 theme_bw() + guides(color=FALSE)
print(p.depth.ampli)

Total read numbers drop off with dilution for 20 PCR cycles, but 40 PCR cycles produces significant numbers of reads even in the Negative control.

Identify contaminants using the frequency method, both pooling all samples and when each sequencing center is identified as a batch:

ampli.min <- isContaminant(ps.ampli, method="frequency", conc="conc", batch="Processing.Institute", batch.combine="minimum", normalize=TRUE)
ampli.pool <- isContaminant(ps.ampli, method="frequency", conc="conc", normalize=TRUE)

Plot the removal of contaminants as a function of the classification threshold:

head(unname(tax.ampli)) # The top 3 ASVs are the true ASVs from the S. bongori monoculture

##      [,1]       [,2]             [,3]                  [,4]               
## [1,] "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Enterobacteriales"
## [2,] "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Enterobacteriales"
## [3,] "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Enterobacteriales"
## [4,] "Bacteria" "Actinobacteria" "Actinobacteria"      "Micrococcales"    
## [5,] "Bacteria" "Proteobacteria" "Alphaproteobacteria" "Rhizobiales"      
## [6,] "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Enterobacteriales"
##      [,5]                 [,6]                  
## [1,] "Enterobacteriaceae" "Salmonella"          
## [2,] "Enterobacteriaceae" "Salmonella"          
## [3,] "Enterobacteriaceae" "Salmonella"          
## [4,] "Microbacteriaceae"  "Microbacterium"      
## [5,] "Brucellaceae"       "Ochrobactrum"        
## [6,] "Enterobacteriaceae" "Escherichia/Shigella"

tot.ampli <- sum(st.ampli[,c(-1,-2,-3)])

threshs <- c(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
fracdf.ampli <- data.frame(pooled=sapply(threshs, function(t) sum(st.ampli[,ampli.pool$p<t], na.rm=TRUE)/tot.ampli),
                     batched=sapply(threshs, function(t) sum(st.ampli[,ampli.min$p<t], na.rm=TRUE)/tot.ampli),
                     threshold=threshs)
svdf.ampli <- data.frame(pooled=sapply(threshs, function(t) sum(ampli.pool$p<t, na.rm=TRUE)/(ncol(st.ampli)-3)),
                     batched=sapply(threshs, function(t) sum(ampli.min$p<t, na.rm=TRUE)/(ncol(st.ampli)-3)),
                     threshold=threshs)
mfrac.ampli <- melt(fracdf.ampli, id.vars="threshold", value.name="Sensitivity", variable.name="Method")
msv.ampli <- melt(svdf.ampli, id.vars="threshold", value.name="Sensitivity", variable.name="Method")
df.sensi.ampli <- rbind(cbind(mfrac.ampli, Measure="Reads", Technology="Amplicon"),
                        cbind(msv.ampli, Measure="ASVs", Technology="Amplicon"))
# Calculate maximum possible sensitivity given can't ID singe-sample ASVs
max.sensi.rd.ampli <- (sum(st.ampli[,colSums(st.ampli>0)>=2])-sum(st.ampli[,c(1,2,3)]))/tot.ampli
max.sensi.asv.ampli <- (sum(colSums(st.ampli>0)>=2)-3)/(ncol(st.ampli)-3)
max.sensi.ampli <- data.frame(Measure=c("Reads", "ASVs"), 
                              Max.Sensitivity=c(max.sensi.rd.ampli, max.sensi.asv.ampli), 
                              Technology="Amplicon")
# Plot 2-panel
p.sensi.ampli <- ggplot(data=df.sensi.ampli, aes(x=threshold, y=Sensitivity, linetype=Method)) + 
  geom_line() + geom_point() + xlim(0, 0.5) + ylim(0,1) +
  theme_bw() + xlab("Classification Threshold (P*)") + ylab("Sensitivity") +
  facet_wrap(~Measure, ncol=1) +
  geom_hline(data=max.sensi.ampli,
             aes(yintercept=Max.Sensitivity), color=prevalencePalette[[4]])
p.sensi.ampli

## Warning: Removed 8 rows containing missing values (geom_path).

## Warning: Removed 16 rows containing missing values (geom_point).

# Save as 6x6in PDF

The green line indicates the maximum possible sensitivity accounting for single-sample features (which decontam cannot classify).

Process Metagenomics Data

This section analyzes the metagenomics sequencing data: “For metagenomic sequencing…Data are deposited under ENA project accession EMBL: ERP006808. Sample details and individual accession numbers are provided in Additional file 1: Table S1b.”

Once downloaded, the metagenomics data was processed using Kraken through the Galaxy online service, selecting the Bacteria database, to create taxonomic profiles, which were then downloaded as .taxonomy files. Those processed files are included as part of this reproducible analysis in the SalterMeta directory.

Read in the sample metadata:

df.meta <- read.csv(file.path(path.meta, "metameta.csv"), stringsAsFactors = FALSE)
rownames(df.meta) <- df.meta$Sample.Name
df.meta$Kit <- sapply(strsplit(df.meta$Sample.Name, "_"), `[`, 1)
# Drop data from PSP as that kit produced almost no reads
df.meta <- df.meta[df.meta$Kit %in% c("CAMBIO","MP", "QIAGEN"),] 
df.meta$Dilution.number[df.meta$Dilution.number == "0 (original culture)"] <- "0"
# Define approximate quantitative concentrations (from Figre 2 in Satler et al.)
conc.dict <- c("0"=1e3, "0 (original culture)"=1e3, "1"=1e2, "2"=1e1, "3"=1, "4"=1, "5"=1, "Negative control"=1)
df.meta$conc <- conc.dict[df.meta$Dilution.number]

Read in the Kraken-assigned taxonomy profiles (those files should be in the directory path.meta):

get.tax <- function(err, level, tax.path=path.meta) {
  foo <- read.table(gzfile(file.path(tax.path, paste0(err, ".taxonomy.gz"))), header=FALSE, sep="\t", stringsAsFactors = FALSE)
  colnames(foo) <- c("Name", "taxID", "root", "superkingdom", "kingdom", "subkingdom", "superphylum", "phylum", "subphylum",
                   "superclass", "class", "subclass", "superorder", "order", "suborder", "superfamily", "family", "subfamily",
                   "tribe", "subtribe", "genus", "subgenus", "species", "subspecies", "blank")
  tab <- table(foo[,level])
  unq <- as.vector(tab)
  names(unq) <- names(tab)
  unq
}
unqs <- lapply(df.meta$Run.accession, get.tax, level="genus")
names(unqs) <- df.meta$Sample.Name

Convert the data into a feature table, and then a phyloseq object:

st.meta <- makeSequenceTable(unqs) # Creates sample-by-genus feature table

## The sequences being tabled vary in length.

st.meta <- st.meta[,colnames(st.meta)!="n"] # Remove unclassified
ft.meta <- sweep(st.meta, 1, rowSums(st.meta), "/") # Convert to frequencies
ps.meta <- phyloseq(otu_table(st.meta, taxa_are_rows=FALSE), sample_data(df.meta))

Plot the overall read numbers:

df.meta$Post.processing.read.count <- rowSums(st.meta)
p.depth.meta <- ggplot(data=df.meta, aes(x=Dilution.number, y=Post.processing.read.count, color=Dilution.number)) + 
                geom_point() + facet_grid(~Kit) + theme_bw()
print(p.depth.meta)

Total read numbers drop off with dilution.

Identify contaminants using the frequency method, both pooling allsamples and when each sequencing kit is identified as a batch:

meta.min <- isContaminant(ps.meta, method="frequency", conc="conc", batch="Kit", batch.combine="minimum", normalize=TRUE)
meta.pool <- isContaminant(ps.meta, method="frequency", conc="conc", normalize=TRUE)

Plot the removal of contaminants as a function of the classification threshold:

# Determine the total number of contaminant (i.e. non-Salmonealla) reads in each sample
tot.meta <- sum(st.meta[,!colnames(st.meta) %in% "Salmonella"])
threshs <- c(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)

fracdf.meta <- data.frame(pooled=sapply(threshs, function(t) sum(st.meta[,meta.pool$p<t], na.rm=TRUE)/tot.meta),
                          batched=sapply(threshs, function(t) sum(st.meta[,meta.min$p<t], na.rm=TRUE)/tot.meta),
                          threshold=threshs)
svdf.meta <- data.frame(pooled=sapply(threshs, function(t) sum(meta.pool$p<t, na.rm=TRUE)/(ncol(st.meta)-1)),
                        batched=sapply(threshs, function(t) sum(meta.min$p<t, na.rm=TRUE)/(ncol(st.meta)-1)),
                        threshold=threshs)
mfrac.meta <- melt(fracdf.meta, id.vars="threshold", value.name="Sensitivity", variable.name="Method")
msv.meta <- melt(svdf.meta, id.vars="threshold", value.name="Sensitivity", variable.name="Method")
df.sensi.meta <- rbind(cbind(mfrac.meta, Measure="Reads", Technology="Shotgun"),
                       cbind(msv.meta, Measure="Genera", Technology="Shotgun"))
# Calculate maximum possible sensitivity given can't ID singe-sample ASVs
max.sensi.rd.meta <- (sum(st.meta[,colSums(st.meta>0)>=2 & !colnames(st.meta) %in% "Salmonella"]))/tot.meta
max.sensi.genus.meta <- (sum(colSums(st.meta>0)>=2)-1)/(ncol(st.meta)-1)
max.sensi.meta <- data.frame(Measure=c("Reads", "Genera"), 
                             Max.Sensitivity=c(max.sensi.rd.meta, max.sensi.genus.meta), 
                             Technology="Shotgun")
# Plot 2-panel
p.sensi.meta <- ggplot(data=df.sensi.meta, aes(x=threshold, y=Sensitivity, linetype=Method)) + 
  geom_line() + geom_point() + xlim(0, 0.5) + ylim(0,1) +
  theme_bw() + xlab("Classification Threshold (P*)") + ylab("Sensitivity") +
  facet_wrap(~Measure, ncol=1) +
  geom_hline(data=max.sensi.meta,
             aes(yintercept=Max.Sensitivity), color=prevalencePalette[[4]])
p.sensi.meta

## Warning: Removed 8 rows containing missing values (geom_path).

## Warning: Removed 16 rows containing missing values (geom_point).

# Save as 6x6in PDF

Looks good. Batched (minimum) still the way to go.

Make 4-Panel Sensitivity Figure

df.sensi <- rbind(df.sensi.ampli, df.sensi.meta)
## table(df.sensi$Method, df.sensi$Measure, df.sensi$Technology)
df.sensi$Measure[df.sensi$Measure %in% c("ASVs", "Genera")] <- "Variants" # Generic term covering both technologies
df.sensi$Measure <- factor(df.sensi$Measure, levels=c("Reads", "Variants"))
## table(df.sensi$Method, df.sensi$Measure, df.sensi$Technology)
max.sensi <- rbind(max.sensi.ampli, max.sensi.meta)
max.sensi$Measure[max.sensi$Measure %in% c("ASVs", "Genera")] <- "Variants" # Generic term covering both technologies
p.sensi <- ggplot(data=df.sensi, aes(x=threshold, y=Sensitivity, linetype=Method)) + 
  geom_line() + geom_point() + xlim(0, 0.5) + ylim(0,1) +
  theme_bw() + xlab("Classification Threshold (P*)") + ylab("Sensitivity") +
  facet_grid(Measure~Technology) +
  geom_hline(data=max.sensi,
             aes(yintercept=Max.Sensitivity), color=prevalencePalette[[4]])
p.sensi

## Warning: Removed 8 rows containing missing values (geom_path).

## Warning: Removed 32 rows containing missing values (geom_point).

ggsave(file.path(path.out, "Salter_Fig4_Sensitivity.pdf"), p.sensi, width=7, height=4, units="in", useDingbats=FALSE)

## Warning: Removed 8 rows containing missing values (geom_path).

## Warning: Removed 32 rows containing missing values (geom_point).

ggsave(file.path(path.out, "Salter_Fig4_Sensitivity.png"), p.sensi, width=7, height=4, units="in")

## Warning: Removed 8 rows containing missing values (geom_path).

## Warning: Removed 32 rows containing missing values (geom_point).

Response to Reviewers: Check the ranked scores of the true-positive variants

data.frame(Score=ampli.pool$p,Rank=rank(1-ampli.pool$p))[1:10,] # First 3 are the true S. bongori ASVs

##           Score Rank
## 1  9.999998e-01    1
## 2  9.999991e-01    3
## 3  9.999992e-01    2
## 4  9.744164e-05  452
## 5  1.359199e-03  450
## 6  7.448283e-02  412
## 7  4.353417e-02  423
## 8  2.545524e-03  447
## 9  1.041278e-01  404
## 10 4.463331e-03  444

data.frame(Score=ampli.min$p,Rank=rank(1-ampli.min$p))[1:10,] # First 3 are the true S. bongori ASVs

##           Score Rank
## 1  0.9886370117    6
## 2  0.9916832486    4
## 3  0.9896982167    5
## 4  0.0001176438  399
## 5  0.0007196191  396
## 6  0.0001144951  400
## 7  0.0023548441  389
## 8  0.0017220808  392
## 9  0.0001806086  398
## 10 0.0111738559  375

data.frame(Score=meta.pool$p,Rank=rank(1-meta.pool$p))[1:10,] # First 1 is the Salmonella genus

##          Score Rank
## 1  0.999999564    1
## 2  0.023452138  237
## 3  0.002788117  247
## 4  0.011328165  244
## 5  0.002793776  246
## 6  0.021293582  239
## 7  0.173613969  158
## 8  0.013558751  242
## 9  0.028997265  233
## 10 0.031287643  232

data.frame(Score=meta.min$p,Rank=rank(1-meta.min$p))[1:10,] # First 1 is the Salmonella genus

##          Score Rank
## 1  0.986871795    1
## 2  0.021265773  192
## 3  0.002428407  200
## 4  0.030302059  180
## 5  0.047717709  165
## 6  0.076229609  153
## 7  0.034051516  178
## 8  0.039414814  174
## 9  0.005371086  199
## 10 0.007237044  198

The true variants are the top variants in every case (except 4/5/6 in ampli/batch), and the scores assigned to those variants are very high, above 0.98 in all cases.

Make 16S and Shotgun Ordination Figure(s)

Create ordinations of the samples without removing contaminants, with default contaminant removal (P*=0.1) and with aggressive removal (P*=0.5).

Make plot for the 16S data:

# Create sequence table with contaminants with P<0.1 removed
st01 <- st.ampli
st01[,ampli.min$p<0.1] <- 0
# Create sequence table with contaminants with P<0.5 removed
st05 <- st.ampli
st05[,ampli.min$p<0.5] <- 0
# Join together with uncorrected sequence table into a phyloseq object
ps00 <- phyloseq(otu_table(st.ampli, taxa_are_rows=FALSE), sample_data(df.ampli))
ps01 <- phyloseq(otu_table(st01, taxa_are_rows=FALSE), sample_data(df.ampli))
ps05 <- phyloseq(otu_table(st05, taxa_are_rows=FALSE), sample_data(df.ampli))
sample_names(ps01) <- paste0(sample_names(ps00), "01")
sample_names(ps05) <- paste0(sample_names(ps00), "05")
sample_data(ps00)$Contam <- "0.0"
sample_data(ps01)$Contam <- "0.1"
sample_data(ps05)$Contam <- "0.5"
psa <- merge_phyloseq(ps00, ps01, ps05)
#psa <- prune_taxa(colSums(st.meta>0)>1,psa)
psa <- prune_samples(sample_sums(psa) > 0, psa)
psa <- transform_sample_counts(psa, function(otu) otu/sum(otu))
psa <- subset_samples(psa, PCR.cycles==40 & Dilution.number != "Neg")
dbray <- vegdist(as(otu_table(psa), "matrix"), "bray")
ord.ampli <- ordinate(psa, distance=dbray, method="MDS")
p.ord.ampli <- plot_ordination(psa, ord.ampli, color="Processing.Institute") +
  facet_grid(Contam~Dilution.number) + 
  theme_bw() + theme(axis.text=element_blank()) +
  scale_color_manual(values=c("ICL"="#CC00CC", "UB"="#009999", "WTSI"="#9FEE00")) +
  theme(panel.grid=element_blank(), axis.ticks=element_blank(),
        strip.text=element_text(size=8)) +
#  ggtitle("16S rRNA Gene Sequencing") +
  guides(color=FALSE) # Will add back in by hand to final figure
p.ord.ampli

ggsave(file.path(path.out, "Salter_Ordination_16S.pdf"), p.ord.ampli, width=3.5, height=2, units="in", useDingbats=FALSE)

Make plot for the metagenomics data:

# Create sequence table with contaminants with P<0.1 removed
st01 <- st.meta
st01[,meta.min$p<0.1] <- 0
# Create sequence table with contaminants with P<0.5 removed
st05 <- st.meta
st05[,meta.min$p<0.5] <- 0
# Join together with uncorrected sequence table into a phyloseq object
ps00 <- phyloseq(otu_table(st.meta, taxa_are_rows=FALSE), sample_data(df.meta))
ps01 <- phyloseq(otu_table(st01, taxa_are_rows=FALSE), sample_data(df.meta))
ps05 <- phyloseq(otu_table(st05, taxa_are_rows=FALSE), sample_data(df.meta))
sample_names(ps01) <- paste0(sample_names(ps01), "01")
sample_names(ps05) <- paste0(sample_names(ps01), "05")
sample_data(ps00)$Contam <- "0.0"
sample_data(ps01)$Contam <- "0.1"
sample_data(ps05)$Contam <- "0.5"
psa <- merge_phyloseq(ps00, ps01, ps05)
#psa <- prune_taxa(colSums(st.meta>0)>1,psa)
psa <- prune_samples(sample_sums(psa) > 0, psa)
psa <- transform_sample_counts(psa, function(otu) otu/sum(otu))
dbray <- vegdist(as(otu_table(psa), "matrix"), "bray")
ord.meta <- ordinate(psa, distance=dbray, method="MDS")
p.ord.meta <- plot_ordination(psa, ord.meta, color="Kit") +
  facet_grid(Contam~Dilution.number) + 
  theme_bw() + theme(axis.text=element_blank()) +
  scale_color_manual(values=c("CAMBIO"="hotpink", "MP"="#CD0074", "QIAGEN"="#FFCC00")) +
  theme(panel.grid=element_blank(), axis.ticks=element_blank(),
        strip.text=element_text(size=8)) +
#  ggtitle("Whole-genome Shotgun Sequencing") +
  guides(color=FALSE) # Will add back in by hand to final figure
p.ord.meta

ggsave(file.path(path.out, "Salter_Ordination_Shotgun.pdf"), p.ord.meta, width=3.5, height=2, units="in", useDingbats=FALSE)

Final publication figure is made with a little manipulation of the two ordinations in Illustrator.

sessionInfo()

## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Sierra 10.12.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] circlize_0.4.4     cowplot_0.9.2      knitr_1.20        
##  [4] dplyr_0.7.5        RColorBrewer_1.1-2 vegan_2.5-2       
##  [7] lattice_0.20-35    permute_0.9-4      gridExtra_2.3     
## [10] dada2_1.8.0        Rcpp_0.12.17       phyloseq_1.22.3   
## [13] reshape2_1.4.3     ggplot2_3.0.0      decontam_1.1.0    
## 
## loaded via a namespace (and not attached):
##  [1] Biobase_2.38.0             jsonlite_1.5              
##  [3] splines_3.4.3              foreach_1.4.4             
##  [5] RcppParallel_4.4.0         assertthat_0.2.0          
##  [7] stats4_3.4.3               latticeExtra_0.6-28       
##  [9] Rsamtools_1.30.0           GenomeInfoDbData_1.0.0    
## [11] yaml_2.1.19                pillar_1.2.3              
## [13] backports_1.1.2            glue_1.2.0                
## [15] digest_0.6.15              GenomicRanges_1.30.3      
## [17] XVector_0.18.0             colorspace_1.3-2          
## [19] htmltools_0.3.6            Matrix_1.2-14             
## [21] plyr_1.8.4                 pkgconfig_2.0.1           
## [23] ShortRead_1.36.1           zlibbioc_1.24.0           
## [25] purrr_0.2.5                scales_0.5.0              
## [27] BiocParallel_1.12.0        tibble_1.4.2              
## [29] mgcv_1.8-23                IRanges_2.12.0            
## [31] withr_2.1.2                SummarizedExperiment_1.8.1
## [33] BiocGenerics_0.24.0        lazyeval_0.2.1            
## [35] survival_2.42-3            magrittr_1.5              
## [37] evaluate_0.10.1            nlme_3.1-137              
## [39] MASS_7.3-50                hwriter_1.3.2             
## [41] tools_3.4.3                data.table_1.11.4         
## [43] GlobalOptions_0.1.0        matrixStats_0.53.1        
## [45] stringr_1.3.1              S4Vectors_0.16.0          
## [47] munsell_0.4.3              cluster_2.0.7-1           
## [49] DelayedArray_0.4.1         bindrcpp_0.2.2            
## [51] Biostrings_2.46.0          ade4_1.7-11               
## [53] compiler_3.4.3             GenomeInfoDb_1.14.0       
## [55] rlang_0.2.1                rhdf5_2.22.0              
## [57] grid_3.4.3                 RCurl_1.95-4.10           
## [59] iterators_1.0.9            biomformat_1.6.0          
## [61] igraph_1.2.1               labeling_0.3              
## [63] bitops_1.0-6               rmarkdown_1.9             
## [65] gtable_0.2.0               codetools_0.2-15          
## [67] multtest_2.34.0            R6_2.2.2                  
## [69] GenomicAlignments_1.14.2   bindr_0.1.1               
## [71] rprojroot_1.3-2            shape_1.4.4               
## [73] ape_5.1                    stringi_1.2.2             
## [75] parallel_3.4.3             tidyselect_0.2.4

Analaysis of Metagenomics data from Salter 2014

Benjamin J Callahan

7/13/2018

Process 16S Amplicon Data

Process Metagenomics Data

Make 4-Panel Sensitivity Figure

Make 16S and Shotgun Ordination Figure(s)