Geography-independent mucosal microbiota alterations in primary sclerosing cholangitis persist after liver transplantation

Additional disease control

1 Introduction

PSC vs ALD

In previous analyses, we showed specific mucosal composition in PSC patients which persist after liver transplantation. Here, we added another positive control, to explore if this condition is disease specific, or it can be credited to (liver) disease in general.

Fo this purpose, we conduct the univariate analysis on Czech ALD patients after LTx. By comparing post_LTx PSC and ALD patients, we can check if the proposed PSC-associated bacteria are at least partially overlapping.

If so, this would confirm our previous conclusions that these bacteria are specific for PSC.

Importing libraries and custom functions built for this analysis

source("custom_functions.R")

2 Data Import

Importing ASV, taxa and metadata tables for both Czech ALD samples samples.

path = "../../../../data/analysis_ready_data/ikem/ALD/"
asv_tab_ALD <- as.data.frame(fread(file.path(path,"asv_table_ALD.tsv"),
                                    check.names = FALSE))
taxa_tab_ALD <- as.data.frame(fread(file.path(path,"taxa_table_ALD.tsv"),
                                     check.names = FALSE))
metadata_ALD <- as.data.frame(fread(file.path(path,"metadata_ALD.tsv"),
                                     check.names = FALSE))

Importing previous Czech dataset with PSC patients.

path = "../../../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
                                    check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
                                     check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
                                     check.names = FALSE))

Importing proposed PSC-associated taxa.

psc_genus_ti <- read.xlsx("../../../results/merged_sites/main_analysis/Q1/univariate_analysis/psc_effect_terminal_ileum.xlsx")

psc_genus_colon <- read.xlsx("../../../results/merged_sites/main_analysis/Q1/univariate_analysis/psc_effect_colon.xlsx")

3 Terminal ileum

segment="terminal_ileum"

ileum_data <- merging_data(asv_tab_1=asv_tab_ALD,
                           asv_tab_2=asv_tab_ikem,
                           taxa_tab_1=taxa_tab_ALD,
                           taxa_tab_2=taxa_tab_ikem,
                           metadata_1=metadata_ALD,
                           metadata_2=metadata_ikem,
                           segment="TI",Q="QALD")
Removing 630 ASV(s)
Removing 1498 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 223 ASV(s)
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]

3.1 Filtering

Rules:

  • sequencing depth > 10000

  • nearZeroVar() with default settings

Library size

read_counts(ileum_asv_tab, line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.1 Sequencing depth

data_filt <- seq_depth_filtering(ileum_asv_tab,
                                 ileum_taxa_tab,
                                 ileum_metadata,
                                 seq_depth_threshold = 10000)
Removing 50 ASV(s)
filt_ileum_asv_tab <- data_filt[[1]]; alpha_ileum_asv_tab <- filt_ileum_asv_tab
filt_ileum_taxa_tab <- data_filt[[2]]; alpha_ileum_taxa_tab <- filt_ileum_taxa_tab
filt_ileum_metadata <- data_filt[[3]]; alpha_ileum_metadata <- filt_ileum_metadata

seq_step <- dim(filt_ileum_asv_tab)[1]

Library size

read_counts(filt_ileum_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.2 NearZeroVar

data_filt <- nearzerovar_filtering(filt_ileum_asv_tab, 
                                   filt_ileum_taxa_tab,
                                   filt_ileum_metadata)

filt_ileum_asv_tab <- data_filt[[1]]
filt_ileum_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_ileum_asv_tab)[1]

Library size

read_counts(filt_ileum_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.3 Final Counts

final_counts_filtering(ileum_asv_tab,
                       filt_ileum_asv_tab,
                       filt_ileum_metadata,
                       seq_step, 0, nearzero_step) %>% `colnames<-`("Count")
                            Count
Raw data: ASVs               3982
Raw data: Samples             185
Sequencing depth filt: ASVs  3932
Prevalence filt: ASVs           0
NearZeroVar filt: ASVs        708
Filt data: ASVs               708
Filt data: Samples            181
Filt data: Patients           181
Filt data: Patients.1           0
Filtered samples                4
Matrices                       TI
healthy                         0
non-rPSC                        0
rPSC                            0
pre_ltx                         0
post_ltx                        0
ETOH                            0

3.2 Alpha diversity

path = "../../../results/merged_sites/main_analysis/QALD/alpha_diversity"

Calculation

# Construct MPSE object
alpha_ileum_metadata$Sample <- alpha_ileum_metadata$SampleID
ileum_mpse <- as.MPSE(construct_phyloseq(alpha_ileum_asv_tab,
                                         alpha_ileum_taxa_tab,
                                         alpha_ileum_metadata))

ileum_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)

# Calculate alpha diversity - rarefied counts
ileum_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)
alpha_div_plots <- list()

# preparing data frame
alpha_data <- data.frame(SampleID=ileum_mpse$Sample.x,
                         Observe=ileum_mpse$Observe,
                         Shannon=ileum_mpse$Shannon,
                         Simpson=ileum_mpse$Simpson,
                         Pielou=ileum_mpse$Pielou,
                         Group=ileum_mpse$Group,
                         Country=ileum_mpse$Country,
                         Patient=ileum_mpse$Patient)

write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
          row.names = FALSE)

3.2.1 Plots

p_boxplot_alpha <- alpha_diversity_countries(alpha_data,
                                             show_legend = FALSE,
                                             size=1,gap=FALSE,jitter=1.5)
Using SampleID, Group, Country as id variables
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha

# see the results
p_boxplot_alpha

3.2.2 Linear Model

path = "../../../results/merged_sites/main_analysis/QALD/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))

Richness

results_model <- pairwise.lm(formula = "Observe ~ Group",
                             factors=alpha_data$Group,
                             data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_observe <- results_model[[1]]
  results_model_observe_emeans <- results_model[[2]]
} else {
  results_model_observe <- results_model
  results_model_observe_emeans <- NA
}

# save the results
pc_observed <- list(); 
pc_observed[[segment]] <- results_model_observe
# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")
Raw results of linear model of richness estimation.
Estimate Std..Error t.value Pr…t.. p.adj sig
HC vs GroupALD post_ltx -16.785 9.990 -1.680 0.097 0.145
ALD post_ltx vs GroupPSC post_ltx -10.564 8.950 -1.180 0.240 0.240
HC vs GroupPSC post_ltx -27.349 9.275 -2.949 0.004 0.011 *
knitr::kable(results_model_observe_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Shannon

results_model <- pairwise.lm(formula = "Shannon ~ Group",
                             factors=alpha_data$Group,
                             data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_shannon <- results_model[[1]]
  results_model_shannon_emeans <- results_model[[2]]
} else {
  results_model_shannon <- results_model
  results_model_shannon_emeans <- NA
}

# save the results
pc_shannon <- list(); 
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)
# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")
Raw results of linear model of Shannon estimation.
Estimate Std..Error t.value Pr…t.. p.adj sig
HC vs GroupALD post_ltx -0.026 0.110 -0.236 0.814 0.814
ALD post_ltx vs GroupPSC post_ltx -0.108 0.102 -1.058 0.292 0.438
HC vs GroupPSC post_ltx -0.134 0.110 -1.219 0.225 0.438
knitr::kable(results_model_shannon_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Simpson

results_model <- pairwise.lm(formula = "Simpson ~ Group",
                                     factors=alpha_data$Group,
                                     data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_simpson <- results_model[[1]]
  results_model_simpson_emeans <- results_model[[2]]
} else {
  results_model_simpson <- results_model
  results_model_simpson_emeans <- NA
}


# save the results
pc_simpson <- list(); 
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)
# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")
Raw results of linear model of Simpson estimation.
Estimate Std..Error t.value Pr…t.. p.adj sig
HC vs GroupALD post_ltx 0.006 0.010 0.587 0.559 0.572
ALD post_ltx vs GroupPSC post_ltx -0.015 0.014 -1.038 0.301 0.572
HC vs GroupPSC post_ltx -0.009 0.016 -0.566 0.572 0.572
knitr::kable(results_model_simpson_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Pielou

results_model <- pairwise.lm(formula = "Pielou ~ Group",
                                     factors=alpha_data$Group,
                                     data=alpha_data)

# check interaction

if (!is.data.frame(results_model)){
  results_model_pielou <- results_model[[1]]
  results_model_pielou_emeans <- results_model[[2]]
} else {
  results_model_pielou <- results_model
  results_model_pielou_emeans <- NA
}

# save the results
pc_pielou <- list(); 
pc_pielou[[segment]] <- as.data.frame(results_model_pielou)
# see the results
knitr::kable(results_model_pielou,digits = 3,
caption = "Raw results of linear model of Pielou estimation.")
Raw results of linear model of Pielou estimation.
Estimate Std..Error t.value Pr…t.. p.adj sig
HC vs GroupALD post_ltx 0.012 0.016 0.726 0.470 0.705
ALD post_ltx vs GroupPSC post_ltx -0.011 0.015 -0.751 0.454 0.705
HC vs GroupPSC post_ltx 0.000 0.017 0.021 0.983 0.983
knitr::kable(results_model_pielou_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

3.2.3 Saving results

alpha_list <- list(
  Richness=pc_observed[[segment]],
  Shannon=pc_shannon[[segment]])
                   
write.xlsx(alpha_list, 
           file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))

3.3 Beta diversity

Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.

3.3.1 Main analysis

Genus level, Aitchison distance

level="genus"
path = "../../../results/merged_sites/main_analysis/QALD/beta_diversity"
pairwise_aitchison_raw <- list()
pca_plots_list <- list()

Aggregation, filtering

# Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             ileum_metadata,
                             seq_depth_threshold=10000)

filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata <- filt_data[[3]]
3.3.1.0.1 PERMANOVA
pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,
                           filt_ileum_metadata$Group,
                           sim.method = "robust.aitchison", p.adjust.m="BH")

# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <- pp_factor
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
HC vs ALD post_ltx 1 440.848 2.479 0.031 0.001 0.002 **
ALD post_ltx vs PSC post_ltx 1 312.281 1.696 0.012 0.005 0.005 **
HC vs PSC post_ltx 1 439.421 2.454 0.018 0.001 0.002 **
3.3.1.0.2 Plots

PCA

p <- pca_plot_custom(filt_ileum_genus_tab,
                                 filt_ileum_genus_taxa,
                                 filt_ileum_metadata,
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=FALSE)

# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p

# see the results
p

3.3.1.1 Saving results

write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]], 
           file = file.path(path,
           paste0("beta_diversity_results_", segment,".xlsx")))

3.4 Univariate Analysis

Similar to previous approaches, we will use two DAA tools - linDA and Maaslin. Only the intersection of sets selected by each tool are shown to minimize false positives. As this analysis can only be conducted on Czech cohort, no interaction effect can be shown, therefore only Group effect is investigated.

This analysis is performed at genus level only.

level="genus"

# needed paths
path = "../../../results/merged_sites/main_analysis/QALD/univariate_analysis"
path_maaslin=file.path("../../../intermediate_files/maaslin/QALD",level)
# variables
raw_linda_results_genus <- list();
raw_linda_results_genus[[segment]] <- list()
linda_results_genus <- list(); 
linda_results_genus[[segment]] <- list()

# country and interaction problems
list_country_union <- list()
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()

# workbook for final df
wb <- createWorkbook()

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

ileum_genus_asv_taxa_tab <- create_asv_taxa_table(ileum_genus_tab,
                                                  ileum_genus_taxa_tab)

3.4.1 PSC vs ALD

We can assume, that the differentially abundant taxa between these two groups are caused by PSC disease and other factors, like liver cirrhosis and overall bad clinical condition.

group <- c("ALD post_ltx","PSC post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

3.4.1.1 linDA

# prepare the data
linda_data <- binomial_prep(ileum_genus_tab,
                            ileum_genus_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
Removing 33 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group')
0  features are filtered!
The filtered data has  144  samples and  200  features will be tested!
Pseudo-count approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plots
volcano <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)

volcano

3.4.1.2 MaAsLin2

volcano <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)


volcano

3.4.1.3 Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

3.4.1.4 Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "ALD post_ltx"
[1] "PSC post_ltx"
[1] "ALD post_ltx"
[1] "PSC post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)

3.4.1.5 Comparison with PSC-associated taxa

psc_ald_sig_ti <- uni_df %>%
  dplyr::filter(final_sig==TRUE) %>%
  dplyr::select(SeqID,Taxonomy,log2FoldChange)

psc_genus_ti <- psc_genus_ti
list_core <- list()
list_core[["PSC vs ALD"]] <- psc_ald_sig_ti$Taxonomy
list_core[["PSC-associated taxa"]] <- psc_genus_ti$Taxonomy
  
# venn diagram
venn <- ggvenn(list_core, fill_color = c("blue", "red")) + 
    ggtitle("Intersection between PSC-associated taxa and (PSC vs ALD)") + 
    guides(fill = "none")

venn

intersection_df <- intersect(list_core$`PSC vs ALD`,list_core$`PSC-associated taxa`)

intersection_df
character(0)

3.4.2 ALD vs HC

We can assume, that the differentially abundant taxa between these two groups are caused by PSC disease and other factors, like liver cirrhosis and overall bad clinical condition.

group <- c("HC","ALD post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])

3.4.2.1 linDA

# prepare the data
linda_data <- binomial_prep(ileum_genus_tab,
                            ileum_genus_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
Removing 82 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group')
0  features are filtered!
The filtered data has  79  samples and  216  features will be tested!
Pseudo-count approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plots
volcano <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)

volcano

3.4.2.2 MaAsLin2

volcano <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)


volcano

3.4.2.3 Group - Intersection

intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

3.4.2.4 Basic statistics

uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "HC"
[1] "ALD post_ltx"
[1] "HC"
[1] "ALD post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)

3.4.2.5 Comparison with PSC-associated taxa

hc_ald_sig_ti <- uni_df %>%
  dplyr::filter(final_sig==TRUE) %>%
  dplyr::select(SeqID,Taxonomy,log2FoldChange)

psc_genus_ti <- psc_genus_ti
list_core <- list()
list_core[["ALD vs HC"]] <- hc_ald_sig_ti$Taxonomy
list_core[["PSC-associated taxa"]] <- psc_genus_ti$Taxonomy
  
# venn diagram
venn <- ggvenn(list_core, fill_color = c("blue", "red")) + 
    ggtitle("Intersection between PSC-associated taxa and (ALD vs HC)") + 
    guides(fill = "none")

venn

From this Venn diagram, we are interested in EXLUSIVE RIGH JOIN

A <- list_core$`ALD vs HC`
B <- list_core$`PSC-associated taxa`
remaining_psc <- B[!(B %in% A)]

psc_genus_ti %>%
  dplyr::filter(Taxonomy %in% remaining_psc)
                         SeqID
1                Enterorhabdus
2                  Barnesiella
3                Butyricimonas
4              Parabacteroides
5                   Holdemania
6             Fusicatenibacter
7            Lachnoclostridium
8 Lachnospiraceae_FCS020_group
9                 Enterococcus
                                                                                                      Taxonomy
1    k__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Enterorhabdus
2                k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Barnesiellaceae;g__Barnesiella
3               k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Butyricimonas
4             k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides
5              k__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemania
6             k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter
7            k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnoclostridium
8 k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospiraceae_FCS020_group
9                   k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Enterococcaceae;g__Enterococcus
  log2FoldChange      p_value         padj
1      -1.050549 1.087023e-02 4.545734e-02
2      -3.177161 1.263665e-03 7.750481e-03
3      -2.900425 6.571752e-05 9.301556e-04
4      -3.418185 1.366753e-05 2.794251e-04
5      -1.213660 1.082333e-02 4.545734e-02
6      -2.864295 5.224510e-04 4.336862e-03
7      -1.781011 1.555333e-03 9.231654e-03
8      -2.231945 8.971276e-04 6.113758e-03
9       4.892498 4.554602e-10 8.380467e-08
uni_df %>%
  dplyr::filter(Taxonomy %in% remaining_psc)
                         SeqID  Q1_clr HC Q1_clr ALD post_ltx     Q1_ra HC
1                  Barnesiella  2.2306841           -1.154492 0.0008978388
2                Butyricimonas  0.4732196           -1.336010 0.0001128859
3                 Enterococcus -1.8330261           -1.727812 0.0000000000
4                Enterorhabdus -1.5823853           -1.727812 0.0000000000
5             Fusicatenibacter  2.4909696           -0.227097 0.0015781761
6                   Holdemania -1.5163636           -1.717373 0.0000000000
7            Lachnoclostridium  4.5353883            4.487578 0.0166991131
8 Lachnospiraceae_FCS020_group  0.3961494           -1.526367 0.0001974724
9              Parabacteroides  3.4409794            0.750685 0.0033754974
  Q1_ra ALD post_ltx MEDIAN_clr HC MEDIAN_clr ALD post_ltx MEDIAN_clr_ALL
1       0.0000000000      3.390671               2.2487867      3.0290037
2       0.0000000000      1.642021               0.3369299      1.1642191
3       0.0000000000     -1.582385              -1.3920182     -1.5512393
4       0.0000000000     -1.233676              -1.3572043     -1.3225254
5       0.0001250234      4.164524               3.3132950      3.8791955
6       0.0000000000     -0.860118              -1.3232237     -1.2138890
7       0.0124473517      5.396674               5.1523001      5.1893387
8       0.0000000000      1.764065              -0.9196969      0.8750241
9       0.0002511326      3.824022               3.5017494      3.7914827
  MEDIAN_ra HC MEDIAN_ra ALD post_ltx MEDIAN_ra_ALL  Q3_clr HC
1 0.0046355656           0.0016150245  0.0024345709  4.6356379
2 0.0007128310           0.0002592059  0.0004303749  2.7339753
3 0.0000000000           0.0000000000  0.0000000000 -1.2896879
4 0.0000000000           0.0000000000  0.0000000000  0.8031675
5 0.0082828283           0.0033676475  0.0068469405  4.7182217
6 0.0000000000           0.0000000000  0.0000000000  0.9174882
7 0.0297397770           0.0211809483  0.0230078563  5.7811458
8 0.0008080808           0.0000000000  0.0002824175  2.3894697
9 0.0073748903           0.0045099334  0.0067575649  4.5215073
  Q3_clr ALD post_ltx     Q3_ra HC Q3_ra ALD post_ltx MEAN_clr HC
1           4.0842760 0.0139621471       8.852401e-03   2.9252004
2           1.8752309 0.0021949078       6.644303e-04   1.3627535
3           0.2506441 0.0000000000       5.779679e-05  -1.3948854
4          -1.0928405 0.0003922887       0.000000e+00  -0.4049738
5           4.4840250 0.0139441116       1.207321e-02   3.7152372
6          -0.7565098 0.0002962085       0.000000e+00  -0.2465916
7           5.7684635 0.0450843327       3.322824e-02   5.2855780
8           1.8150207 0.0016318205       7.561644e-04   1.2077527
9           4.5894257 0.0130519300       1.339695e-02   3.4087184
  MEAN_clr ALD post_ltx   MEAN_ra HC MEAN_ra ALD post_ltx Cliffs_delta_clr
1             1.8160679 0.0093336646         5.606783e-03      -0.26254826
2             0.2876302 0.0014975868         7.910501e-04      -0.34362934
3            -0.6655413 0.0003453243         3.431538e-04       0.21364221
4            -1.1217975 0.0003712743         1.037741e-04      -0.23809524
5             2.6386800 0.0116677646         1.006397e-02      -0.18275418
6            -0.9965843 0.0002566601         8.634005e-05      -0.27927928
7             5.0817617 0.0362248096         3.235612e-02      -0.07335907
8             0.1262936 0.0011968063         8.176818e-04      -0.33075933
9             2.7475073 0.0105718892         1.109219e-02      -0.11068211
  Cliffs_delta_ra    LFC_clr       LFC_ra PREVALENCE_ HC
1      -0.3140283 -1.1091326 -0.735270444     0.83783784
2      -0.4646075 -1.0751233 -0.920798696     0.75675676
3      -0.4916345  0.7293441 -0.009096507     0.05405405
4      -0.7451737 -0.7168237 -1.839038396     0.37837838
5      -0.1969112 -1.0765572 -0.213328037     0.97297297
6      -0.6628057 -0.7499927 -1.571757406     0.48648649
7      -0.1724582 -0.2038164 -0.162939515     1.00000000
8      -0.4478764 -1.0814591 -0.549578180     0.75675676
9      -0.1557272 -0.6612111  0.069310834     0.89189189
  PREVALENCE_ ALD post_ltx
1                0.6666667
2                0.5238095
3                0.2619048
4                0.1666667
5                0.7380952
6                0.2380952
7                0.9761905
8                0.4523810
9                0.7857143
                                                                                                      Taxonomy
1                k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Barnesiellaceae;g__Barnesiella
2               k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Butyricimonas
3                   k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Enterococcaceae;g__Enterococcus
4    k__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Enterorhabdus
5             k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter
6              k__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemania
7            k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnoclostridium
8 k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospiraceae_FCS020_group
9             k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides
  log2FoldChange     p_value        padj final_sig
1     -1.9677889 0.011973662 0.043835781     FALSE
2     -1.9248057 0.001377080 0.010256872     FALSE
3      0.7455791 0.092817775 0.213283399     FALSE
4     -1.3801647 0.002025050 0.011821911     FALSE
5     -1.9111605 0.010528585 0.041705672     FALSE
6     -1.4112402 0.001166764 0.009693115     FALSE
7     -0.6520372 0.101176093 0.223000367     FALSE
8     -1.9230978 0.001758570 0.011530669     FALSE
9     -1.3370097 0.079717532 0.191645059     FALSE

3.4.2.6 Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
p_heatmap_linda

Dot heatmap

dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
                                      ileum_taxa_tab) + xlab("") + ylab("")
min_clr -1.551239 
max_clr 6.219872 
min_log -2.921954 
max_log 2.390885 
dotheatmap_linda

Horizontal bar plot

p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))
Using SeqID as id variables
p_prevalence_final <- ggarrange(p_prevalence,
                                ggplot() + theme_minimal(),
                                nrow = 2,heights = c(1,0.15))
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_bar()`).
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),
               p_prevalence_final,
               ncol=2,widths = c(0.8,0.3))
Warning: Removed 17 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 17 rows containing missing values or values outside the scale range
(`geom_text()`).
p

dot_heatmap_ileum <- p

4 Data Analysis - Colon

segment="colon"

colon_data <- merging_data(asv_tab_1=asv_tab_ALD,
                           asv_tab_2=asv_tab_ikem,
                           taxa_tab_1=taxa_tab_ALD,
                           taxa_tab_2=taxa_tab_ikem,
                           metadata_1=metadata_ALD,
                           metadata_2=metadata_ikem,
                           segment=segment,Q="QALD")
Removing 158 ASV(s)
Removing 739 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 207 ASV(s)
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]

4.1 Filtering

Rules: - sequencing depth > 10000 - nearZeroVar() with default settings

Library size

read_counts(colon_asv_tab, line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

4.1.1 Sequencing depth

data_filt <- seq_depth_filtering(colon_asv_tab,
                                 colon_taxa_tab,
                                 colon_metadata,
                                 seq_depth_threshold = 10000)
Removing 4 ASV(s)
filt_colon_asv_tab <- data_filt[[1]]; alpha_colon_asv_tab <- filt_colon_asv_tab
filt_colon_taxa_tab <- data_filt[[2]]; alpha_colon_taxa_tab <- filt_colon_taxa_tab
filt_colon_metadata <- data_filt[[3]]; alpha_colon_metadata <- filt_colon_metadata

seq_step <- dim(filt_colon_asv_tab)[1]

Library size

read_counts(filt_colon_asv_tab,line = c(10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

4.1.2 NearZeroVar

data_filt <- nearzerovar_filtering(filt_colon_asv_tab,
                                   filt_colon_taxa_tab,
                                   filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]
filt_colon_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_colon_asv_tab)[1]

Library size

read_counts(filt_colon_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

Check zero depth

data_filt <- check_zero_depth(filt_colon_asv_tab, 
                              filt_colon_taxa_tab, 
                              filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]; 
filt_colon_taxa_tab <- data_filt[[2]]; 
filt_colon_metadata <- data_filt[[3]]; 

Library size

read_counts(filt_colon_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

4.1.3 Final Counts

final_counts_filtering(colon_asv_tab,
                       filt_colon_asv_tab,
                       filt_colon_metadata,
                       seq_step, 0, nearzero_step)
                                         V1
Raw data: ASVs                         4893
Raw data: Samples                       400
Sequencing depth filt: ASVs            4889
Prevalence filt: ASVs                     0
NearZeroVar filt: ASVs                  514
Filt data: ASVs                         514
Filt data: Samples                      395
Filt data: Patients                     210
Filt data: Patients.1                     0
Filtered samples                          5
Matrices                    Cecum;Rectum;CD
healthy                                   0
non-rPSC                                  0
rPSC                                      0
pre_ltx                                   0
post_ltx                                  0
ETOH                                      0

4.2 Alpha diversity

Calculating Richness, Shannon, Simpson, Pielou indexes on raw (unfiltered) rarefied data. Samples with sequencing depth < 10000 were excluded. Testing by linear mixed-effect model.

path = "../../../results/merged_sites/main_analysis/QALD/alpha_diversity"

Calculation

# Construct MPSE object
alpha_colon_metadata$Sample <- alpha_colon_metadata$SampleID
colon_mpse <- as.MPSE(construct_phyloseq(alpha_colon_asv_tab,
                                         alpha_colon_taxa_tab,
                                         alpha_colon_metadata))

colon_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)

# Calculate alpha diversity - rarefied counts
colon_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)
alpha_data <- data.frame(SampleID=colon_mpse$Sample.x,
                         Observe=colon_mpse$Observe,
                         Shannon=colon_mpse$Shannon,
                         Simpson=colon_mpse$Simpson,
                         Pielou=colon_mpse$Pielou,
                         Group=colon_mpse$Group,
                         Country=colon_mpse$Country,
                         Patient=colon_mpse$Patient)

write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
          row.names = FALSE)

4.2.1 Plots

p_boxplot_alpha <- alpha_diversity_countries(alpha_data,
                                             show_legend = FALSE,
                                             size=1,gap=TRUE,jitter = 1.5)
Using SampleID, Group, Country as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country as id variables

Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha

# see the results
p_boxplot_alpha

4.2.2 Linear Model

path = "../../../results/merged_sites/main_analysis/QALD/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))

Richness

results_model <- pairwise.lmer(
  formula = "Observe ~ Group + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_observe <- results_model[[1]]
  results_model_observe_detailed <- results_model[[2]]
} else {
  results_model_observe <- results_model
  results_model_observe_detailed <- NA
}

# save the results
pc_observed[[segment]] <- results_model_observe
# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")
Raw results of linear model of richness estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
HC vs GroupALD post_ltx -25.783 8.899 101.256 -2.897 0.005 0.007 **
ALD post_ltx vs GroupPSC post_ltx -1.333 8.824 154.989 -0.151 0.880 0.880
HC vs GroupPSC post_ltx -27.170 8.556 161.321 -3.175 0.002 0.005 **
knitr::kable(results_model_observe_detailed,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Shannon

results_model <- pairwise.lmer(
  formula = "Shannon ~ Group  + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_shannon <- results_model[[1]]
  results_model_shannon_detailed <- results_model[[2]]
} else {
  results_model_shannon <- results_model
  results_model_shannon_detailed <- NA
}

# save the results
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)
# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")
Raw results of linear model of Shannon estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
HC vs GroupALD post_ltx -0.187 0.083 101.292 -2.251 0.027 0.047 *
ALD post_ltx vs GroupPSC post_ltx -0.028 0.108 152.383 -0.257 0.797 0.797
HC vs GroupPSC post_ltx -0.218 0.100 158.220 -2.172 0.031 0.047 *
knitr::kable(results_model_shannon_detailed,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Simpson

results_model <- pairwise.lmer(
  formula = "Simpson ~ Group + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_simpson <- results_model[[1]]
  results_model_simpson_detailed <- results_model[[2]]
} else {
  results_model_simpson <- results_model
  results_model_simpson_detailed <- NA
}

# save the results
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)
# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")
Raw results of linear model of Simpson estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
HC vs GroupALD post_ltx -0.007 0.006 100.855 -1.206 0.231 0.284
ALD post_ltx vs GroupPSC post_ltx -0.019 0.018 152.512 -1.074 0.284 0.284
HC vs GroupPSC post_ltx -0.027 0.017 157.717 -1.607 0.110 0.284
knitr::kable(results_model_simpson_detailed,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Pielou

results_model <- pairwise.lmer(
  formula = "Pielou ~ Group + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_pielou <- results_model[[1]]
  results_model_pielou_detailed <- results_model[[2]]
} else {
  results_model_pielou <- results_model
  results_model_pielou_detailed <- NA
}

# save the results
pc_pielou[[segment]] <- as.data.frame(results_model_pielou)
# see the results
knitr::kable(results_model_pielou,digits = 3,
caption = "Raw results of linear model of Pielou estimation.")
Raw results of linear model of Pielou estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
HC vs GroupALD post_ltx -0.013 0.011 101.108 -1.182 0.240 0.401
ALD post_ltx vs GroupPSC post_ltx -0.004 0.017 151.723 -0.244 0.807 0.807
HC vs GroupPSC post_ltx -0.018 0.016 157.456 -1.113 0.267 0.401
knitr::kable(results_model_pielou_detailed,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

4.2.3 Saving results

alpha_list <- list(
  Richness=pc_observed[[segment]] %>% rownames_to_column("Comparison"),
  Shannon=pc_shannon[[segment]] %>% rownames_to_column("Comparison"),
  Simpson=pc_simpson[[segment]] %>% rownames_to_column("Comparison"),
  Pielou=pc_pielou[[segment]] %>% rownames_to_column("Comparison"))
                   
write.xlsx(alpha_list, 
           file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))

4.3 Beta diversity

Calculating Aitchison distance (euclidean distance on clr-transformed data) at genus level (main analysis). In supplements, there are also bray-curtis and Jaccard distance at ASV and genus levels. Testing by PERMANOVA.

4.3.1 Main analysis

Genus level, Aitchison distance

level="genus"
path = "../../../results/merged_sites/main_analysis/QALD/beta_diversity"

Aggregation, filtering

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)

filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             colon_metadata,
                            seq_depth_threshold=10000)

filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_genus_metadata <- filt_data[[3]]

4.3.2 PERMANOVA

pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
                           patients = filt_colon_genus_metadata$Patient,
                           sim.method = "robust.aitchison", p.adjust.m="BH")


# tidy the results
pp_factor <- pp_main

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols

# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <-rbind(pp_factor
                                                        )
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
HC vs ALD post_ltx 1 912.451 5.183 0.027 0.001 0.002 **
ALD post_ltx vs PSC post_ltx 1 477.932 2.675 0.009 0.040 0.040 *
HC vs PSC post_ltx 1 1115.245 6.365 0.021 0.001 0.002 **
4.3.2.0.1 Plots
p <- pca_plot_custom(filt_colon_genus_tab,
                                 filt_colon_genus_taxa,
                                 filt_colon_genus_metadata,
                                 show_boxplots = TRUE,
                                 variable = "Group", size=2, 
                                 show_legend= FALSE)

# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p

# see the results
p

4.3.2.1 Saving results

write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]], 
           file = file.path(path,
           paste0("beta_diversity_results_", segment,".xlsx")))

4.4 Univariate Analysis

level="genus"
# needed paths
path = "../../../results/merged_sites/main_analysis/QALD/univariate_analysis"
path_maaslin=file.path("../../../intermediate_files/maaslin/QALD",level)

# variables
raw_linda_results_genus[[segment]] <- list()
linda_results_genus[[segment]] <- list()

# country and interaction problems
list_country_union <- list()
list_venns <- list()

# workbook for final df
wb <- createWorkbook()
level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

colon_genus_asv_taxa_tab <- create_asv_taxa_table(colon_genus_tab,
                                                  colon_genus_taxa_tab)
4.4.0.0.1 PSC vs ALD
4.4.0.0.1.1 linDA
group <- c("ALD post_ltx","PSC post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
# prepare the data
linda_data <- binomial_prep(colon_genus_tab,
                            colon_genus_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
Removing 12 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data,
                   filt_colon_uni_metadata,
                   formula = '~ Group + (1|Patient)')
0  features are filtered!
The filtered data has  301  samples and  170  features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano <- volcano_plot_linda(linda.output, group1, 
                                taxa_table = filt_colon_uni_taxa) + 
            ggtitle(paste(group,collapse=" vs "))


# see the plot
volcano

4.4.0.0.1.2 MaAsLin2
volcano <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano

4.4.0.0.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

4.4.0.0.1.4 Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "ALD post_ltx"
[1] "PSC post_ltx"
[1] "ALD post_ltx"
[1] "PSC post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)
4.4.0.0.1.5 Comparison with PSC-associated taxa
psc_ald_sig_colon <- uni_df %>%
  dplyr::filter(final_sig==TRUE) %>%
  dplyr::select(SeqID,Taxonomy,log2FoldChange)

psc_genus_colon <- psc_genus_colon
list_core <- list()
list_core[["PSC vs ALD"]] <- psc_ald_sig_colon$Taxonomy
list_core[["PSC-associated taxa"]] <- psc_genus_colon$Taxonomy

# venn diagram
venn <- ggvenn(list_core, fill_color = c("blue", "red")) + 
    ggtitle("Intersection between PSC-associated taxa and (PSC vs ALD)") + 
    guides(fill = "none")

venn

intersection_df <- intersect(list_core$`PSC vs ALD`,list_core$`PSC-associated taxa`)

intersection_df
[1] "k__Bacteria;p__Firmicutes;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Colidextribacter"          
[2] "k__Bacteria;p__Firmicutes;c__Negativicutes;o__Veillonellales-Selenomonadales;f__Veillonellaceae;g__Dialister"
[3] "k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnoclostridium"           
4.4.0.0.2 ALD vs HC
4.4.0.0.2.1 linDA
group <- c("HC","ALD post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])
# prepare the data
linda_data <- binomial_prep(colon_genus_tab,
                            colon_genus_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
Removing 100 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data,
                   filt_colon_uni_metadata,
                   formula = '~ Group + (1|Patient)')
0  features are filtered!
The filtered data has  187  samples and  186  features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])

for (grp in c(group1)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano <- volcano_plot_linda(linda.output, group1, 
                                taxa_table = filt_colon_uni_taxa) + 
            ggtitle(paste(group,collapse=" vs "))


# see the plot
volcano

4.4.0.0.2.2 MaAsLin2
volcano <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano

4.4.0.0.2.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

4.4.0.0.2.4 Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "HC"
[1] "ALD post_ltx"
[1] "HC"
[1] "ALD post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)
4.4.0.0.2.5 Comparison with PSC-associated taxa
hc_ald_sig_colon <- uni_df %>%
  dplyr::filter(final_sig==TRUE) %>%
  dplyr::select(SeqID,Taxonomy,log2FoldChange)

psc_genus_colon <- psc_genus_colon
list_core <- list()
list_core[["ALD vs HC"]] <- hc_ald_sig_colon$Taxonomy
list_core[["PSC-associated taxa"]] <- psc_genus_colon$Taxonomy

# venn diagram
venn <- ggvenn(list_core, fill_color = c("blue", "red")) + 
    ggtitle("Intersection between PSC-associated taxa and (ALD vs HC)") + 
    guides(fill = "none")

venn

From this Venn diagram, we are interested in EXLUSIVE RIGH JOIN

A <- list_core$`ALD vs HC`
B <- list_core$`PSC-associated taxa`
remaining_psc <- B[!(B %in% A)]

psc_genus_colon %>%
  dplyr::filter(Taxonomy %in% remaining_psc)
              SeqID
1            Rothia
2   Parabacteroides
3  Fusicatenibacter
4 Lachnoclostridium
5  Colidextribacter
6         Dialister
7        Klebsiella
                                                                                                      Taxonomy
1               k__Bacteria;p__Actinobacteriota;c__Actinobacteria;o__Micrococcales;f__Micrococcaceae;g__Rothia
2             k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides
3             k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter
4            k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnoclostridium
5           k__Bacteria;p__Firmicutes;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Colidextribacter
6 k__Bacteria;p__Firmicutes;c__Negativicutes;o__Veillonellales-Selenomonadales;f__Veillonellaceae;g__Dialister
7 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Klebsiella
  log2FoldChange      p_value         padj
1       1.842842 1.452081e-05 1.953709e-04
2      -3.568149 2.950689e-06 6.238599e-05
3      -1.917547 1.097200e-02 3.455012e-02
4      -1.736481 1.812625e-03 8.383390e-03
5      -1.624469 8.895010e-03 2.991958e-02
6       2.144460 1.055678e-02 3.396528e-02
7       2.288824 1.471023e-03 7.257045e-03
uni_df %>%
  dplyr::filter(Taxonomy %in% remaining_psc)
              SeqID Q1_clr HC Q1_clr ALD post_ltx     Q1_ra HC
1  Colidextribacter  1.386414           1.3243781 0.0006398704
2         Dialister -2.055467          -1.8464644 0.0000000000
3  Fusicatenibacter  2.627958          -0.9536309 0.0024423570
4        Klebsiella -2.173923          -2.1627708 0.0000000000
5 Lachnoclostridium  4.526164           4.3276317 0.0163044634
6   Parabacteroides  3.207395           1.7713357 0.0044359835
7            Rothia -2.019276          -1.6659988 0.0000000000
  Q1_ra ALD post_ltx MEDIAN_clr HC MEDIAN_clr ALD post_ltx MEDIAN_clr_ALL
1       0.0005364459     2.1904872               2.6123898      2.4197662
2       0.0000000000    -1.6466225              -1.5459890     -1.5933847
3       0.0000000000     3.7263000               3.4597865      3.6125117
4       0.0000000000    -1.9014687              -1.7576714     -1.8353611
5       0.0098512164     5.0769310               4.9156184      5.0561821
6       0.0005008488     3.8462676               3.5515723      3.7260301
7       0.0000000000    -0.9038266               0.1388787     -0.2249714
  MEDIAN_ra HC MEDIAN_ra ALD post_ltx MEDIAN_ra_ALL  Q3_clr HC
1 1.624514e-03           0.0018135471  0.0017926884  3.2727209
2 0.000000e+00           0.0000000000  0.0000000000  0.8918002
3 6.821282e-03           0.0042333567  0.0059990771  4.3400312
4 0.000000e+00           0.0000000000  0.0000000000 -1.5486429
5 2.914977e-02           0.0193969639  0.0225328829  5.7097862
6 7.486673e-03           0.0054098774  0.0067624683  4.3896860
7 7.396176e-05           0.0001571464  0.0000896218  0.3132439
  Q3_clr ALD post_ltx     Q3_ra HC Q3_ra ALD post_ltx MEAN_clr HC
1            3.346080 0.0038319946       0.0036286551   2.1263747
2            1.130148 0.0003777954       0.0003012978  -0.7515134
3            4.601093 0.0127660808       0.0130167982   3.4889730
4           -1.279010 0.0000000000       0.0000000000  -1.5998927
5            5.660613 0.0444031574       0.0311998481   5.1168178
6            4.456647 0.0122222926       0.0108266661   3.4580071
7            1.708148 0.0002178330       0.0006215708  -0.6688172
  MEAN_clr ALD post_ltx   MEAN_ra HC MEAN_ra ALD post_ltx Cliffs_delta_clr
1            2.07987983 0.0042530750         0.0026577988       0.05491991
2           -0.30065932 0.0006040873         0.0021484826       0.14187643
3            2.57899858 0.0109515100         0.0103134190      -0.10068650
4           -1.17216554 0.0008302963         0.0019654497       0.15125858
5            5.00529390 0.0390427752         0.0270145037      -0.08489703
6            2.84063312 0.0124980591         0.0096894309      -0.10549199
7            0.09788766 0.0003256949         0.0005838665       0.24965675
  Cliffs_delta_ra     LFC_clr      LFC_ra PREVALENCE_ HC
1     -0.05995423 -0.04649491 -0.67827442     0.88421053
2     -0.44462243  0.45085405  1.83048904     0.28421053
3     -0.15240275 -0.90997442 -0.08660713     0.97894737
4     -0.74691076  0.42772714  1.24316131     0.06315789
5     -0.24942792 -0.11152395 -0.53132143     1.00000000
6     -0.20389016 -0.61737399 -0.36722022     0.91578947
7     -0.01876430  0.76670482  0.84211724     0.50526316
  PREVALENCE_ ALD post_ltx
1                0.8369565
2                0.3043478
3                0.7391304
4                0.1304348
5                1.0000000
6                0.8260870
7                0.5760870
                                                                                                      Taxonomy
1           k__Bacteria;p__Firmicutes;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Colidextribacter
2 k__Bacteria;p__Firmicutes;c__Negativicutes;o__Veillonellales-Selenomonadales;f__Veillonellaceae;g__Dialister
3             k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter
4 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Klebsiella
5            k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnoclostridium
6             k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides
7               k__Bacteria;p__Actinobacteriota;c__Actinobacteria;o__Micrococcales;f__Micrococcaceae;g__Rothia
  log2FoldChange    p_value       padj final_sig
1     -0.3104290 0.52649237 0.64677677     FALSE
2      0.5034364 0.40186250 0.53774406     FALSE
3     -1.3714286 0.02440421 0.05895042     FALSE
4      0.3653255 0.39248230 0.53238987     FALSE
5     -0.1464704 0.63786636 0.71471773     FALSE
6     -1.0640435 0.05019802 0.11249195     FALSE
7      0.9879995 0.01331926 0.03643209     FALSE
4.4.0.0.3 Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
p_heatmap_linda

Dot heatmap

dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_statistics$colon[grepl(level,names(uni_statistics$colon))],
                                      colon_taxa_tab) + xlab("") + ylab("")
min_clr -1.8479 
max_clr 6.028454 
min_log -3.176554 
max_log 2.795394 
dotheatmap_linda

Horizontal bar plot

p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))
Using SeqID as id variables
p_prevalence_final <- ggarrange(p_prevalence,
                                ggplot() + theme_minimal(),
                                nrow = 2,heights = c(1,0.115))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.3))

dot_heatmap_colon <- p
dot_heatmap_colon

5 Paper-ready visualizations

Alpha diversity

p_A <- alpha_div_plots$`terminal_ileum Country` +
  ggtitle("Terminal ileum")+
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
        legend.position = "none")  

p_B <-  alpha_div_plots$`colon Country` +
  ggtitle("Colon") +
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
        legend.position = "none") 

QALD_alpha <- ggarrange(p_A, ggplot() + theme_minimal(),
                      p_B,
                      nrow=1, ncol=3,widths = c(1,0.1,1))

QALD_alpha

Beta diversity

pca_ti <- pca_plots_list$`terminal_ileum genus custom`  +
  ggtitle("Terminal ileum")+
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
        legend.position = "none")  


pca_colon <- pca_plots_list$`colon genus custom` + 
  ggtitle("Colon")+
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
        legend.position = "none")  

genus_QALD_beta <- ggarrange(pca_ti,
                           ggplot() + theme_minimal(),
                           pca_colon,
                           ncol=3,
                           widths = c(1,0.1,1))
genus_QALD_beta

Alpha + Beta diversity

alpha_beta <- ggarrange(QALD_alpha,genus_QALD_beta,
                        ncol = 1,nrow=2,labels = c("A","B"))
alpha_beta

5.1 Supplement

genus_QALD_beta <- ggarrange(genus_QALD_beta,ggplot() + theme_minimal(),ncol=2,
                                widths = c(1,0.15))

genus_QALD_beta

6 Session info

sessionInfo()
R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 26100)

Matrix products: default


locale:
[1] LC_COLLATE=Czech_Czechia.utf8  LC_CTYPE=Czech_Czechia.utf8   
[3] LC_MONETARY=Czech_Czechia.utf8 LC_NUMERIC=C                  
[5] LC_TIME=Czech_Czechia.utf8    

time zone: Europe/Prague
tzcode source: internal

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] mice_3.17.0              picante_1.8.2            ape_5.8-1               
 [4] lubridate_1.9.3          forcats_1.0.0            stringr_1.5.1           
 [7] tidyverse_2.0.0          kableExtra_1.4.0         tidyr_1.3.1             
[10] gbm_2.2.2                doParallel_1.0.17        iterators_1.0.14        
[13] foreach_1.5.2            ranger_0.17.0            ggvenn_0.1.15           
[16] Maaslin2_1.16.0          purrr_1.0.2              pROC_1.18.5             
[19] glmnet_4.1-8             MicrobiomeStat_1.2       caret_6.0-94            
[22] openxlsx_4.2.8           magrittr_2.0.3           emmeans_1.11.2-8        
[25] lmerTest_3.1-3           robustlmm_3.3-1          lme4_1.1-37             
[28] Matrix_1.6-5             mgcv_1.9-1               nlme_3.1-164            
[31] pheatmap_1.0.13          reshape2_1.4.4           vegan_2.6-10            
[34] lattice_0.22-6           permute_0.9-8            ggplotify_0.1.2         
[37] ggrepel_0.9.6            ggpubr_0.6.1             MicrobiotaProcess_1.14.1
[40] phyloseq_1.46.0          ggplot2_3.5.2            tibble_3.2.1            
[43] dplyr_1.1.4              cowplot_1.2.0            readr_2.1.5             
[46] igraph_2.1.4             ccrepe_1.38.1            data.table_1.15.4       

loaded via a namespace (and not attached):
  [1] fs_1.6.5                    matrixStats_1.5.0          
  [3] bitops_1.0-9                RColorBrewer_1.1-3         
  [5] numDeriv_2016.8-1.1         tools_4.3.1                
  [7] backports_1.4.1             R6_2.6.1                   
  [9] lazyeval_0.2.2              jomo_2.7-6                 
 [11] rhdf5filters_1.14.1         withr_3.0.2                
 [13] gridExtra_2.3               cli_3.6.2                  
 [15] Biobase_2.62.0              logging_0.10-108           
 [17] biglm_0.9-3                 sandwich_3.1-1             
 [19] labeling_0.4.3              mvtnorm_1.3-3              
 [21] robustbase_0.99-4-1         pbapply_1.7-4              
 [23] systemfonts_1.2.2           yulab.utils_0.2.1          
 [25] svglite_2.1.3               parallelly_1.38.0          
 [27] rstudioapi_0.17.1           generics_0.1.4             
 [29] gridGraphics_0.5-1          shape_1.4.6.1              
 [31] car_3.1-3                   zip_2.3.2                  
 [33] biomformat_1.30.0           S4Vectors_0.40.2           
 [35] abind_1.4-8                 infotheo_1.2.0.1           
 [37] lifecycle_1.0.4             multcomp_1.4-28            
 [39] yaml_2.3.10                 carData_3.0-5              
 [41] SummarizedExperiment_1.32.0 Rtsne_0.17                 
 [43] rhdf5_2.46.1                recipes_1.3.1              
 [45] SparseArray_1.2.4           grid_4.3.1                 
 [47] mitml_0.4-5                 crayon_1.5.3               
 [49] pillar_1.11.0               knitr_1.50                 
 [51] optparse_1.7.5              GenomicRanges_1.54.1       
 [53] statip_0.2.3                boot_1.3-32                
 [55] estimability_1.5.1          future.apply_1.11.3        
 [57] codetools_0.2-20            pan_1.9                    
 [59] glue_1.7.0                  ggfun_0.2.0                
 [61] vctrs_0.6.5                 treeio_1.26.0              
 [63] Rdpack_2.6.4                gtable_0.3.6               
 [65] maditr_0.8.6                gower_1.0.1                
 [67] xfun_0.52                   rbibutils_2.3              
 [69] S4Arrays_1.2.1              prodlim_2024.06.25         
 [71] libcoin_1.0-10              coda_0.19-4.1              
 [73] reformulas_0.4.1            pcaPP_2.0-5                
 [75] modeest_2.4.0               survival_3.5-8             
 [77] timeDate_4041.110           hardhat_1.4.2              
 [79] lava_1.8.1                  statmod_1.5.0              
 [81] TH.data_1.1-4               ipred_0.9-15               
 [83] ggtree_3.10.1               GenomeInfoDb_1.38.8        
 [85] fBasics_4041.97             rpart_4.1.23               
 [87] colorspace_2.1-1            BiocGenerics_0.48.1        
 [89] DBI_1.2.3                   nnet_7.3-19                
 [91] ade4_1.7-23                 tidyselect_1.2.1           
 [93] timeSeries_4041.111         compiler_4.3.1             
 [95] microbiome_1.24.0           xml2_1.3.8                 
 [97] DelayedArray_0.28.0         scales_1.4.0               
 [99] DEoptimR_1.1-4              spatial_7.3-18             
[101] rappdirs_0.3.3              digest_0.6.35              
[103] minqa_1.2.8                 rmarkdown_2.29             
[105] XVector_0.42.0              htmltools_0.5.8.1          
[107] pkgconfig_2.0.3             MatrixGenerics_1.14.0      
[109] stabledist_0.7-2            fastmap_1.2.0              
[111] rlang_1.1.3                 htmlwidgets_1.6.4          
[113] farver_2.1.2                zoo_1.8-13                 
[115] jsonlite_2.0.0              ModelMetrics_1.2.2.2       
[117] RCurl_1.98-1.17             modeltools_0.2-24          
[119] Formula_1.2-5               GenomeInfoDbData_1.2.11    
[121] patchwork_1.3.2             Rhdf5lib_1.24.2            
[123] Rcpp_1.0.12                 ggnewscale_0.5.2           
[125] fastGHQuad_1.0.1            stringi_1.8.3              
[127] ggstar_1.0.4                stable_1.1.6               
[129] zlibbioc_1.48.2             MASS_7.3-60.0.1            
[131] plyr_1.8.9                  listenv_0.9.1              
[133] Biostrings_2.70.3           splines_4.3.1              
[135] hash_2.2.6.3                multtest_2.58.0            
[137] hms_1.1.3                   ggtreeExtra_1.12.0         
[139] ggsignif_0.6.4              stats4_4.3.1               
[141] rmutil_1.1.10               evaluate_1.0.5             
[143] nloptr_2.2.1                tzdb_0.5.0                 
[145] getopt_1.20.4               future_1.34.0              
[147] clue_0.3-66                 coin_1.4-3                 
[149] broom_1.0.9                 xtable_1.8-4               
[151] tidytree_0.4.6              rstatix_0.7.2              
[153] viridisLite_0.4.2           class_7.3-22               
[155] aplot_0.2.8                 IRanges_2.36.0             
[157] cluster_2.1.8.1             timechange_0.3.0           
[159] globals_0.18.0