Load packages.

library(decontam); packageVersion("decontam")
## [1] '1.11.1'
library(ggplot2); packageVersion("ggplot2")
## [1] '3.3.5'

Import Curated Data

Note, the imported data has already been curated in the companion script FelineBileCuration.Rmd.

load(file="../RDS/bile.rda")

Inspecting and Curating Microscopy and Culture Results

These bile samples were subjected to a set of clinical microbiological methods to identify Bactibilia based on microscopy, and to identify the presence of several species of bacteria based on culture results (see manuscript for details of the microbiological methods).

The culture and pathology results recorded are in the $Bactibilia, $Organisms, and BacteriaPLUS.BacteriaNEG columns in df. Start by inspecting the $Bactibilia designation over the samples types.

table(SampleType=df$SampleType, Bactibilia=df$Bactibilia, useNA="ifany")
##                     Bactibilia
## SampleType           Bactibilia Nobactibilia <NA>
##   Negative_Isolation          0            0    3
##   Negative_Library            0            0    1
##   Negative_Sampling           0            0   13
##   Positive                    6           50    1

Only six samples in our curated set were identified as having Bactibilia. NA designations were given to all the negative controls, and one real sample.

Making a new logical column that encodes these results.

df$Bactibilia_Positive <- df$Bactibilia == "Bactibilia"
table(Bactibilia=df$Bactibilia, Bactibilia_Positive=df$Bactibilia_Positive, useNA="ifany")
##               Bactibilia_Positive
## Bactibilia     FALSE TRUE <NA>
##   Bactibilia       0    6    0
##   Nobactibilia    50    0    0
##   <NA>             0    0   18

Look correct.

Now inspect the $Organisms designation over the sample types.

table(df$SampleType, df$Organisms, useNA="ifany")
##                     
##                      E.coli E.coli.clostridiumspp. E.coli.enterococcus
##   Negative_Isolation      0                      0                   0
##   Negative_Library        0                      0                   0
##   Negative_Sampling       0                      0                   0
##   Positive                3                      1                   1
##                     
##                      E.coli.Streptococcus Noculturedorganisms
##   Negative_Isolation                    0                   0
##   Negative_Library                      0                   0
##   Negative_Sampling                     0                   0
##   Positive                              1                  47
##                     
##                      Nocultureperformed Peptostreptococcus Staphepidermis <NA>
##   Negative_Isolation                  0                  0              0    3
##   Negative_Library                    0                  0              0    1
##   Negative_Sampling                   0                  0              0   13
##   Positive                            2                  1              1    0

All negative control samples have an NA designation. Of the bile samples, 47 received a "Noculturedoragnisms" designation, while the rest were assigned strings corresponding to the taxa for which they were positive. If culture-positive for multiple taxa (E.coli, clostridiumspp, )

Cleaning this data up by creating individual columns for each organism that was tested for by culture (e.g. $Ecoli), and an additional column $Culture_Positive that is TRUE/FALSE depending on if any organism was successfully cultured from it (and NA if not tested).

str.cult <- c("E.coli", "E.coli.clostridiumspp.", "E.coli.enterococcus", 
              "E.coli.Streptococcus", "Peptostreptococcus", "Staphepidermis")
df$Culture_Positive <- df$Organisms %in% str.cult
df$Culture_Positive[is.na(df$Organisms) | df$Organisms %in% "Nocultureperformed"] <- NA
table(Organisms=df$Organisms, Culture_Positive=df$Culture_Positive, useNA="ifany")
##                         Culture_Positive
## Organisms                FALSE TRUE <NA>
##   E.coli                     0    3    0
##   E.coli.clostridiumspp.     0    1    0
##   E.coli.enterococcus        0    1    0
##   E.coli.Streptococcus       0    1    0
##   Noculturedorganisms       47    0    0
##   Nocultureperformed         0    0    2
##   Peptostreptococcus         0    1    0
##   Staphepidermis             0    1    0
##   <NA>                       0    0   17

Looks correct.

str.ecoli <- "E.coli"
str.clost <- "clostridiumspp."
str.enter <- "enterococcus"
str.strep <- "Streptococcus"
str.pepto <- "Peptostreptococcus"
df$Ecoli <- grepl(str.ecoli, df$Organisms)
df$Clost <- grepl(str.clost, df$Organisms)
df$Enter <- grepl(str.enter, df$Organisms)
df$Strep <- grepl(str.strep, df$Organisms)
df$Pepto <- grepl(str.pepto, df$Organisms)
# Set to NA all those where no culture was performed
df[is.na(df$Culture_Positive), c("Ecoli", "Clost", "Enter", "Strep", "Pepto")] <- NA
df[,c("Organisms", "Ecoli", "Clost", "Enter", "Strep", "Pepto")]
##                      Organisms Ecoli Clost Enter Strep Pepto
## 716-513    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 716-515    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 716-516                 E.coli  TRUE FALSE FALSE FALSE FALSE
## 716-517    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 716-518    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 716-520   E.coli.Streptococcus  TRUE FALSE FALSE  TRUE FALSE
## 716-521                 E.coli  TRUE FALSE FALSE FALSE FALSE
## 716-522    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 718-513    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 718-515                 E.coli  TRUE FALSE FALSE FALSE FALSE
## 718-516    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 718-517    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 718-518    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 718-520    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 718-521    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 718-522    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 719-513    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 719-515     Peptostreptococcus FALSE FALSE FALSE FALSE  TRUE
## 719-517     Nocultureperformed    NA    NA    NA    NA    NA
## 719-518    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 719-520    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 719-521    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 719-522    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 720-513         Staphepidermis FALSE FALSE FALSE FALSE FALSE
## 720-515     Nocultureperformed    NA    NA    NA    NA    NA
## 720-516    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 720-517 E.coli.clostridiumspp.  TRUE  TRUE FALSE FALSE FALSE
## 720-518    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 720-520    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 720-521    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 720-522    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 721-513    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 721-516    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 721-517    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 721-520    E.coli.enterococcus  TRUE FALSE  TRUE FALSE FALSE
## 721-521                   <NA>    NA    NA    NA    NA    NA
## 721-522    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 722-513                   <NA>    NA    NA    NA    NA    NA
## 722-515    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 722-516    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 722-517    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 722-518    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 722-520    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 722-521    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 722-522    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 723-513    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 723-515    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 723-516    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 723-517    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 723-518    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 723-520                   <NA>    NA    NA    NA    NA    NA
## 723-521    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 723-522                   <NA>    NA    NA    NA    NA    NA
## 724-513    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 724-515                   <NA>    NA    NA    NA    NA    NA
## 724-516    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 724-517                   <NA>    NA    NA    NA    NA    NA
## 724-518    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 724-520                   <NA>    NA    NA    NA    NA    NA
## 724-521    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 724-522                   <NA>    NA    NA    NA    NA    NA
## 726-513    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 726-515                   <NA>    NA    NA    NA    NA    NA
## 726-516    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 726-517                   <NA>    NA    NA    NA    NA    NA
## 726-520                   <NA>    NA    NA    NA    NA    NA
## 726-521    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 726-522                   <NA>    NA    NA    NA    NA    NA
## 727-513    Noculturedorganisms FALSE FALSE FALSE FALSE FALSE
## 727-515                   <NA>    NA    NA    NA    NA    NA
## 727-516                   <NA>    NA    NA    NA    NA    NA
## 727-517                   <NA>    NA    NA    NA    NA    NA
## 727-518                   <NA>    NA    NA    NA    NA    NA
## 727-521                   <NA>    NA    NA    NA    NA    NA

Looks correct.

Finally, let’s inspect the relationship between the $Culture_Positive results and the $Bactibilia results.

table(Culture_Positive=df$Culture_Positive, Bactibilia_Positive=df$Bactibilia_Positive, useNA="ifany")
##                 Bactibilia_Positive
## Culture_Positive FALSE TRUE <NA>
##            FALSE    46    0    1
##            TRUE      2    6    0
##            <NA>      2    0   17

Highly concordant, except for two samples in which culture detected a bacteria and Bactibilia was not detected. Inspecting those rows.

i.inspect <- which(df$Culture_Positive %in% TRUE & !df$Bactibilia_Positive)
df[i.inspect, c("SampleType", "Bactibilia_Positive", "Culture_Positive", "Organisms")]
##         SampleType Bactibilia_Positive Culture_Positive          Organisms
## 719-515   Positive               FALSE             TRUE Peptostreptococcus
## 720-513   Positive               FALSE             TRUE     Staphepidermis

Interesting note: These are the only two culture positive samples in which E. coli was not detected by culture, and thus the culture positive designation relied on culture results for other bacteria.

i.cultpos <- which(df$Culture_Positive %in% TRUE)
table(Ecoli=df[i.cultpos, "Ecoli"], Bactibilia=df[i.cultpos, "Bactibilia_Positive"], useNA="ifany")
##        Bactibilia
## Ecoli   FALSE TRUE
##   FALSE     2    0
##   TRUE      0    6

Investigate Library Size versus Culture

Do culture positive samples show any evidence of higher library sizes? (We think that culture might indicate infection, and thus a higher microbial load, which could manifest as higher library sizes, although perhaps only if most samples are nearly sterile and thus the library preparation step fails to make equimolar DNA libraries across samples.)

ggplot(data=df[df$SampleType == "Positive",], 
       aes(x=Culture_Positive, y=LibrarySize)) + 
       geom_boxplot() + geom_jitter(width=0.1)

That is at least suggestive. Let’s discriminate the E. coli positive samples that lined up perfectly with the Bactibilia designation.

ggplot(data=df[df$SampleType == "Positive",], 
       aes(x=Culture_Positive, y=LibrarySize, color=Ecoli)) + 
       geom_boxplot() + geom_jitter(width=0.1)

Small number alert, but the Ecoli positive samples trend even higher in library size. By the “eye test” I suspect it might even reach statistical significance.

t.test(df$LibrarySize[df$Ecoli & df$SampleType %in% "Positive"],
       df$LibrarySize[!df$Ecoli & df$SampleType %in% "Positive"],
       alternative="greater", na.action="na.omit")
## 
##  Welch Two Sample t-test
## 
## data:  df$LibrarySize[df$Ecoli & df$SampleType %in% "Positive"] and df$LibrarySize[!df$Ecoli & df$SampleType %in% "Positive"]
## t = 4.8242, df = 19.977, p-value = 5.168e-05
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  20743.55      Inf
## sample estimates:
## mean of x mean of y 
## 107793.67  75506.49
wilcox.test(df$LibrarySize[df$Ecoli & df$SampleType %in% "Positive"],
            df$LibrarySize[!df$Ecoli & df$SampleType %in% "Positive"],
            alternative="greater", na.action="na.omit")
## 
##  Wilcoxon rank sum exact test
## 
## data:  df$LibrarySize[df$Ecoli & df$SampleType %in% "Positive"] and df$LibrarySize[!df$Ecoli & df$SampleType %in% "Positive"]
## W = 249, p-value = 0.001977
## alternative hypothesis: true location shift is greater than 0

Naive two-sample stats agrees.

Save reults and sessionInfo

Saving the current results for the next step of inspecting agreement with the 16S sequencing.

if(!identical(rownames(tax.filt), colnames(st.filt))) stop("Table mismatch: tax.filt, st.filt")
if(!identical(rownames(df), rownames(st))) stop("Mismatched samples in df and st.")
if(!identical(rownames(df), rownames(st.filt))) stop("Mismatched samples in df and st.")
save(st, st.filt, tax.filt, df, file="../RDS/bile_curated.rda")
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] ggplot2_3.3.5   decontam_1.11.1
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.7       highr_0.9        plyr_1.8.6       bslib_0.3.1     
##  [5] compiler_4.0.3   pillar_1.6.3     jquerylib_0.1.4  tools_4.0.3     
##  [9] digest_0.6.28    jsonlite_1.7.2   evaluate_0.14    lifecycle_1.0.1 
## [13] tibble_3.1.5     gtable_0.3.0     pkgconfig_2.0.3  rlang_0.4.11    
## [17] DBI_1.1.1        yaml_2.2.1       xfun_0.26        fastmap_1.1.0   
## [21] withr_2.4.2      stringr_1.4.0    dplyr_1.0.5      knitr_1.36      
## [25] generics_0.1.0   sass_0.4.0       vctrs_0.3.8      grid_4.0.3      
## [29] tidyselect_1.1.1 glue_1.4.2       R6_2.5.1         fansi_0.5.0     
## [33] rmarkdown_2.11   farver_2.1.0     reshape2_1.4.4   purrr_0.3.4     
## [37] magrittr_2.0.1   scales_1.1.1     htmltools_0.5.2  ellipsis_0.3.2  
## [41] assertthat_0.2.1 colorspace_2.0-2 labeling_0.4.2   utf8_1.2.2      
## [45] stringi_1.7.5    munsell_0.5.0    crayon_1.4.1