source("custom_functions.R")Geography-independent mucosal microbiota alterations in primary sclerosing cholangitis persist after liver transplantation
Additional disease control
1 Introduction
PSC vs ALD
In previous analyses, we showed specific mucosal composition in PSC patients which persist after liver transplantation. Here, we added another positive control, to explore if this condition is disease specific, or it can be credited to (liver) disease in general.
Fo this purpose, we conduct the univariate analysis on Czech ALD patients after LTx. By comparing post_LTx PSC and ALD patients, we can check if the proposed PSC-associated bacteria are at least partially overlapping.
If so, this would confirm our previous conclusions that these bacteria are specific for PSC.
Importing libraries and custom functions built for this analysis
2 Data Import
Importing ASV, taxa and metadata tables for both Czech ALD samples samples.
path = "../../../../data/analysis_ready_data/ikem/ALD/"
asv_tab_ALD <- as.data.frame(fread(file.path(path,"asv_table_ALD.tsv"),
check.names = FALSE))
taxa_tab_ALD <- as.data.frame(fread(file.path(path,"taxa_table_ALD.tsv"),
check.names = FALSE))
metadata_ALD <- as.data.frame(fread(file.path(path,"metadata_ALD.tsv"),
check.names = FALSE))Importing previous Czech dataset with PSC patients.
path = "../../../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
check.names = FALSE))Importing proposed PSC-associated taxa.
psc_genus_ti <- read.xlsx("../../../results/merged_sites/main_analysis/Q1/univariate_analysis/psc_effect_terminal_ileum.xlsx")
psc_genus_colon <- read.xlsx("../../../results/merged_sites/main_analysis/Q1/univariate_analysis/psc_effect_colon.xlsx")3 Terminal ileum
segment="terminal_ileum"
ileum_data <- merging_data(asv_tab_1=asv_tab_ALD,
asv_tab_2=asv_tab_ikem,
taxa_tab_1=taxa_tab_ALD,
taxa_tab_2=taxa_tab_ikem,
metadata_1=metadata_ALD,
metadata_2=metadata_ikem,
segment="TI",Q="QALD")Removing 630 ASV(s)
Removing 1498 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 223 ASV(s)
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]3.1 Filtering
Rules:
sequencing depth > 10000
nearZeroVar() with default settings
Library size
read_counts(ileum_asv_tab, line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
3.1.1 Sequencing depth
data_filt <- seq_depth_filtering(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
seq_depth_threshold = 10000)Removing 50 ASV(s)
filt_ileum_asv_tab <- data_filt[[1]]; alpha_ileum_asv_tab <- filt_ileum_asv_tab
filt_ileum_taxa_tab <- data_filt[[2]]; alpha_ileum_taxa_tab <- filt_ileum_taxa_tab
filt_ileum_metadata <- data_filt[[3]]; alpha_ileum_metadata <- filt_ileum_metadata
seq_step <- dim(filt_ileum_asv_tab)[1]Library size
read_counts(filt_ileum_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
3.1.2 NearZeroVar
data_filt <- nearzerovar_filtering(filt_ileum_asv_tab,
filt_ileum_taxa_tab,
filt_ileum_metadata)
filt_ileum_asv_tab <- data_filt[[1]]
filt_ileum_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_ileum_asv_tab)[1]Library size
read_counts(filt_ileum_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
3.1.3 Final Counts
final_counts_filtering(ileum_asv_tab,
filt_ileum_asv_tab,
filt_ileum_metadata,
seq_step, 0, nearzero_step) %>% `colnames<-`("Count") Count
Raw data: ASVs 3982
Raw data: Samples 185
Sequencing depth filt: ASVs 3932
Prevalence filt: ASVs 0
NearZeroVar filt: ASVs 708
Filt data: ASVs 708
Filt data: Samples 181
Filt data: Patients 181
Filt data: Patients.1 0
Filtered samples 4
Matrices TI
healthy 0
non-rPSC 0
rPSC 0
pre_ltx 0
post_ltx 0
ETOH 0
3.2 Alpha diversity
path = "../../../results/merged_sites/main_analysis/QALD/alpha_diversity"Calculation
# Construct MPSE object
alpha_ileum_metadata$Sample <- alpha_ileum_metadata$SampleID
ileum_mpse <- as.MPSE(construct_phyloseq(alpha_ileum_asv_tab,
alpha_ileum_taxa_tab,
alpha_ileum_metadata))
ileum_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)
# Calculate alpha diversity - rarefied counts
ileum_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)alpha_div_plots <- list()
# preparing data frame
alpha_data <- data.frame(SampleID=ileum_mpse$Sample.x,
Observe=ileum_mpse$Observe,
Shannon=ileum_mpse$Shannon,
Simpson=ileum_mpse$Simpson,
Pielou=ileum_mpse$Pielou,
Group=ileum_mpse$Group,
Country=ileum_mpse$Country,
Patient=ileum_mpse$Patient)
write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
row.names = FALSE)3.2.1 Plots
p_boxplot_alpha <- alpha_diversity_countries(alpha_data,
show_legend = FALSE,
size=1,gap=FALSE,jitter=1.5)Using SampleID, Group, Country as id variables
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha
# see the results
p_boxplot_alpha3.2.2 Linear Model
path = "../../../results/merged_sites/main_analysis/QALD/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))Richness
results_model <- pairwise.lm(formula = "Observe ~ Group",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_observe <- results_model[[1]]
results_model_observe_emeans <- results_model[[2]]
} else {
results_model_observe <- results_model
results_model_observe_emeans <- NA
}
# save the results
pc_observed <- list();
pc_observed[[segment]] <- results_model_observe# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")| Estimate | Std..Error | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|
| HC vs GroupALD post_ltx | -16.785 | 9.990 | -1.680 | 0.097 | 0.145 | |
| ALD post_ltx vs GroupPSC post_ltx | -10.564 | 8.950 | -1.180 | 0.240 | 0.240 | |
| HC vs GroupPSC post_ltx | -27.349 | 9.275 | -2.949 | 0.004 | 0.011 | * |
knitr::kable(results_model_observe_emeans,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Shannon
results_model <- pairwise.lm(formula = "Shannon ~ Group",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_shannon <- results_model[[1]]
results_model_shannon_emeans <- results_model[[2]]
} else {
results_model_shannon <- results_model
results_model_shannon_emeans <- NA
}
# save the results
pc_shannon <- list();
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")| Estimate | Std..Error | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|
| HC vs GroupALD post_ltx | -0.026 | 0.110 | -0.236 | 0.814 | 0.814 | |
| ALD post_ltx vs GroupPSC post_ltx | -0.108 | 0.102 | -1.058 | 0.292 | 0.438 | |
| HC vs GroupPSC post_ltx | -0.134 | 0.110 | -1.219 | 0.225 | 0.438 |
knitr::kable(results_model_shannon_emeans,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Simpson
results_model <- pairwise.lm(formula = "Simpson ~ Group",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_simpson <- results_model[[1]]
results_model_simpson_emeans <- results_model[[2]]
} else {
results_model_simpson <- results_model
results_model_simpson_emeans <- NA
}
# save the results
pc_simpson <- list();
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")| Estimate | Std..Error | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|
| HC vs GroupALD post_ltx | 0.006 | 0.010 | 0.587 | 0.559 | 0.572 | |
| ALD post_ltx vs GroupPSC post_ltx | -0.015 | 0.014 | -1.038 | 0.301 | 0.572 | |
| HC vs GroupPSC post_ltx | -0.009 | 0.016 | -0.566 | 0.572 | 0.572 |
knitr::kable(results_model_simpson_emeans,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Pielou
results_model <- pairwise.lm(formula = "Pielou ~ Group",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_pielou <- results_model[[1]]
results_model_pielou_emeans <- results_model[[2]]
} else {
results_model_pielou <- results_model
results_model_pielou_emeans <- NA
}
# save the results
pc_pielou <- list();
pc_pielou[[segment]] <- as.data.frame(results_model_pielou)# see the results
knitr::kable(results_model_pielou,digits = 3,
caption = "Raw results of linear model of Pielou estimation.")| Estimate | Std..Error | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|
| HC vs GroupALD post_ltx | 0.012 | 0.016 | 0.726 | 0.470 | 0.705 | |
| ALD post_ltx vs GroupPSC post_ltx | -0.011 | 0.015 | -0.751 | 0.454 | 0.705 | |
| HC vs GroupPSC post_ltx | 0.000 | 0.017 | 0.021 | 0.983 | 0.983 |
knitr::kable(results_model_pielou_emeans,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
3.2.3 Saving results
alpha_list <- list(
Richness=pc_observed[[segment]],
Shannon=pc_shannon[[segment]])
write.xlsx(alpha_list,
file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))3.3 Beta diversity
Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.
3.3.1 Main analysis
Genus level, Aitchison distance
level="genus"path = "../../../results/merged_sites/main_analysis/QALD/beta_diversity"pairwise_aitchison_raw <- list()
pca_plots_list <- list()Aggregation, filtering
# Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level=level,
names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
genus_data[[2]],
ileum_metadata,
seq_depth_threshold=10000)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata <- filt_data[[3]]3.3.1.0.1 PERMANOVA
pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,
filt_ileum_metadata$Group,
sim.method = "robust.aitchison", p.adjust.m="BH")
# tidy the results
pp_factor <- pp_main
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols
# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <- pp_factor# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| HC vs ALD post_ltx | 1 | 440.848 | 2.479 | 0.031 | 0.001 | 0.002 | ** |
| ALD post_ltx vs PSC post_ltx | 1 | 312.281 | 1.696 | 0.012 | 0.005 | 0.005 | ** |
| HC vs PSC post_ltx | 1 | 439.421 | 2.454 | 0.018 | 0.001 | 0.002 | ** |
3.3.1.0.2 Plots
PCA
p <- pca_plot_custom(filt_ileum_genus_tab,
filt_ileum_genus_taxa,
filt_ileum_metadata,
show_boxplots = TRUE,
variable = "Group", size=3, show_legend=FALSE)
# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p
# see the results
p3.3.1.1 Saving results
write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]],
file = file.path(path,
paste0("beta_diversity_results_", segment,".xlsx")))3.4 Univariate Analysis
Similar to previous approaches, we will use two DAA tools - linDA and Maaslin. Only the intersection of sets selected by each tool are shown to minimize false positives. As this analysis can only be conducted on Czech cohort, no interaction effect can be shown, therefore only Group effect is investigated.
This analysis is performed at genus level only.
level="genus"
# needed paths
path = "../../../results/merged_sites/main_analysis/QALD/univariate_analysis"
path_maaslin=file.path("../../../intermediate_files/maaslin/QALD",level)# variables
raw_linda_results_genus <- list();
raw_linda_results_genus[[segment]] <- list()
linda_results_genus <- list();
linda_results_genus[[segment]] <- list()
# country and interaction problems
list_country_union <- list()
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()
# workbook for final df
wb <- createWorkbook()Aggregate taxa
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level = level)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
ileum_genus_asv_taxa_tab <- create_asv_taxa_table(ileum_genus_tab,
ileum_genus_taxa_tab)3.4.1 PSC vs ALD
We can assume, that the differentially abundant taxa between these two groups are caused by PSC disease and other factors, like liver cirrhosis and overall bad clinical condition.
group <- c("ALD post_ltx","PSC post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])3.4.1.1 linDA
# prepare the data
linda_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group, usage="linDA")Removing 33 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]
# fit the model
linda.obj <- linda(filt_ileum_uni_data,
filt_ileum_uni_metadata,
formula = '~ Group')0 features are filtered!
The filtered data has 144 samples and 200 features will be tested!
Pseudo-count approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)
# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
for (grp in c(group1)){
raw_linda_results_genus[[segment]][[grp]] <-
rawlinda.df(linda.output,
grp,
filt_ileum_uni_data,
filt_ileum_uni_taxa)
linda_results_genus[[segment]][[grp]] <-
linda.df(linda.output,
grp,
filt_ileum_uni_data,
filt_ileum_uni_taxa)
}# volcano plots
volcano <- volcano_plot_linda(linda.output,
group1,
taxa_table = filt_ileum_uni_taxa) +
ggtitle(comparison_title)
volcano3.4.1.2 MaAsLin2
volcano <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) +
ggtitle(comparison_title)
volcano3.4.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
linda.output, fit_data,
raw_linda_results_genus,
segment = segment,
level=level)
list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]
# show the results
venn3.4.1.4 Basic statistics
uni_df <- merge(basic_univariate_statistics(linda_data,group),
raw_linda_results_genus[[segment]][[group1]],
by="SeqID",all=TRUE)[1] "ALD post_ltx"
[1] "PSC post_ltx"
[1] "ALD post_ltx"
[1] "PSC post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df
# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)3.4.1.5 Comparison with PSC-associated taxa
psc_ald_sig_ti <- uni_df %>%
dplyr::filter(final_sig==TRUE) %>%
dplyr::select(SeqID,Taxonomy,log2FoldChange)
psc_genus_ti <- psc_genus_tilist_core <- list()
list_core[["PSC vs ALD"]] <- psc_ald_sig_ti$Taxonomy
list_core[["PSC-associated taxa"]] <- psc_genus_ti$Taxonomy
# venn diagram
venn <- ggvenn(list_core, fill_color = c("blue", "red")) +
ggtitle("Intersection between PSC-associated taxa and (PSC vs ALD)") +
guides(fill = "none")
vennintersection_df <- intersect(list_core$`PSC vs ALD`,list_core$`PSC-associated taxa`)
intersection_dfcharacter(0)
3.4.2 ALD vs HC
We can assume, that the differentially abundant taxa between these two groups are caused by PSC disease and other factors, like liver cirrhosis and overall bad clinical condition.
group <- c("HC","ALD post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])3.4.2.1 linDA
# prepare the data
linda_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group, usage="linDA")Removing 82 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]
# fit the model
linda.obj <- linda(filt_ileum_uni_data,
filt_ileum_uni_metadata,
formula = '~ Group')0 features are filtered!
The filtered data has 79 samples and 216 features will be tested!
Pseudo-count approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)
# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
for (grp in c(group1)){
raw_linda_results_genus[[segment]][[grp]] <-
rawlinda.df(linda.output,
grp,
filt_ileum_uni_data,
filt_ileum_uni_taxa)
linda_results_genus[[segment]][[grp]] <-
linda.df(linda.output,
grp,
filt_ileum_uni_data,
filt_ileum_uni_taxa)
}# volcano plots
volcano <- volcano_plot_linda(linda.output,
group1,
taxa_table = filt_ileum_uni_taxa) +
ggtitle(comparison_title)
volcano3.4.2.2 MaAsLin2
volcano <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) +
ggtitle(comparison_title)
volcano3.4.2.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
linda.output, fit_data,
raw_linda_results_genus,
segment = segment,
level=level)
list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]
# show the results
venn3.4.2.4 Basic statistics
uni_df <- merge(basic_univariate_statistics(linda_data,group),
raw_linda_results_genus[[segment]][[group1]],
by="SeqID",all=TRUE)[1] "HC"
[1] "ALD post_ltx"
[1] "HC"
[1] "ALD post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df
# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)3.4.2.5 Comparison with PSC-associated taxa
hc_ald_sig_ti <- uni_df %>%
dplyr::filter(final_sig==TRUE) %>%
dplyr::select(SeqID,Taxonomy,log2FoldChange)
psc_genus_ti <- psc_genus_tilist_core <- list()
list_core[["ALD vs HC"]] <- hc_ald_sig_ti$Taxonomy
list_core[["PSC-associated taxa"]] <- psc_genus_ti$Taxonomy
# venn diagram
venn <- ggvenn(list_core, fill_color = c("blue", "red")) +
ggtitle("Intersection between PSC-associated taxa and (ALD vs HC)") +
guides(fill = "none")
vennFrom this Venn diagram, we are interested in EXLUSIVE RIGH JOIN
A <- list_core$`ALD vs HC`
B <- list_core$`PSC-associated taxa`
remaining_psc <- B[!(B %in% A)]
psc_genus_ti %>%
dplyr::filter(Taxonomy %in% remaining_psc) SeqID
1 Enterorhabdus
2 Barnesiella
3 Butyricimonas
4 Parabacteroides
5 Holdemania
6 Fusicatenibacter
7 Lachnoclostridium
8 Lachnospiraceae_FCS020_group
9 Enterococcus
Taxonomy
1 k__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Enterorhabdus
2 k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Barnesiellaceae;g__Barnesiella
3 k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Butyricimonas
4 k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides
5 k__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemania
6 k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter
7 k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnoclostridium
8 k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospiraceae_FCS020_group
9 k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Enterococcaceae;g__Enterococcus
log2FoldChange p_value padj
1 -1.050549 1.087023e-02 4.545734e-02
2 -3.177161 1.263665e-03 7.750481e-03
3 -2.900425 6.571752e-05 9.301556e-04
4 -3.418185 1.366753e-05 2.794251e-04
5 -1.213660 1.082333e-02 4.545734e-02
6 -2.864295 5.224510e-04 4.336862e-03
7 -1.781011 1.555333e-03 9.231654e-03
8 -2.231945 8.971276e-04 6.113758e-03
9 4.892498 4.554602e-10 8.380467e-08
uni_df %>%
dplyr::filter(Taxonomy %in% remaining_psc) SeqID Q1_clr HC Q1_clr ALD post_ltx Q1_ra HC
1 Barnesiella 2.2306841 -1.154492 0.0008978388
2 Butyricimonas 0.4732196 -1.336010 0.0001128859
3 Enterococcus -1.8330261 -1.727812 0.0000000000
4 Enterorhabdus -1.5823853 -1.727812 0.0000000000
5 Fusicatenibacter 2.4909696 -0.227097 0.0015781761
6 Holdemania -1.5163636 -1.717373 0.0000000000
7 Lachnoclostridium 4.5353883 4.487578 0.0166991131
8 Lachnospiraceae_FCS020_group 0.3961494 -1.526367 0.0001974724
9 Parabacteroides 3.4409794 0.750685 0.0033754974
Q1_ra ALD post_ltx MEDIAN_clr HC MEDIAN_clr ALD post_ltx MEDIAN_clr_ALL
1 0.0000000000 3.390671 2.2487867 3.0290037
2 0.0000000000 1.642021 0.3369299 1.1642191
3 0.0000000000 -1.582385 -1.3920182 -1.5512393
4 0.0000000000 -1.233676 -1.3572043 -1.3225254
5 0.0001250234 4.164524 3.3132950 3.8791955
6 0.0000000000 -0.860118 -1.3232237 -1.2138890
7 0.0124473517 5.396674 5.1523001 5.1893387
8 0.0000000000 1.764065 -0.9196969 0.8750241
9 0.0002511326 3.824022 3.5017494 3.7914827
MEDIAN_ra HC MEDIAN_ra ALD post_ltx MEDIAN_ra_ALL Q3_clr HC
1 0.0046355656 0.0016150245 0.0024345709 4.6356379
2 0.0007128310 0.0002592059 0.0004303749 2.7339753
3 0.0000000000 0.0000000000 0.0000000000 -1.2896879
4 0.0000000000 0.0000000000 0.0000000000 0.8031675
5 0.0082828283 0.0033676475 0.0068469405 4.7182217
6 0.0000000000 0.0000000000 0.0000000000 0.9174882
7 0.0297397770 0.0211809483 0.0230078563 5.7811458
8 0.0008080808 0.0000000000 0.0002824175 2.3894697
9 0.0073748903 0.0045099334 0.0067575649 4.5215073
Q3_clr ALD post_ltx Q3_ra HC Q3_ra ALD post_ltx MEAN_clr HC
1 4.0842760 0.0139621471 8.852401e-03 2.9252004
2 1.8752309 0.0021949078 6.644303e-04 1.3627535
3 0.2506441 0.0000000000 5.779679e-05 -1.3948854
4 -1.0928405 0.0003922887 0.000000e+00 -0.4049738
5 4.4840250 0.0139441116 1.207321e-02 3.7152372
6 -0.7565098 0.0002962085 0.000000e+00 -0.2465916
7 5.7684635 0.0450843327 3.322824e-02 5.2855780
8 1.8150207 0.0016318205 7.561644e-04 1.2077527
9 4.5894257 0.0130519300 1.339695e-02 3.4087184
MEAN_clr ALD post_ltx MEAN_ra HC MEAN_ra ALD post_ltx Cliffs_delta_clr
1 1.8160679 0.0093336646 5.606783e-03 -0.26254826
2 0.2876302 0.0014975868 7.910501e-04 -0.34362934
3 -0.6655413 0.0003453243 3.431538e-04 0.21364221
4 -1.1217975 0.0003712743 1.037741e-04 -0.23809524
5 2.6386800 0.0116677646 1.006397e-02 -0.18275418
6 -0.9965843 0.0002566601 8.634005e-05 -0.27927928
7 5.0817617 0.0362248096 3.235612e-02 -0.07335907
8 0.1262936 0.0011968063 8.176818e-04 -0.33075933
9 2.7475073 0.0105718892 1.109219e-02 -0.11068211
Cliffs_delta_ra LFC_clr LFC_ra PREVALENCE_ HC
1 -0.3140283 -1.1091326 -0.735270444 0.83783784
2 -0.4646075 -1.0751233 -0.920798696 0.75675676
3 -0.4916345 0.7293441 -0.009096507 0.05405405
4 -0.7451737 -0.7168237 -1.839038396 0.37837838
5 -0.1969112 -1.0765572 -0.213328037 0.97297297
6 -0.6628057 -0.7499927 -1.571757406 0.48648649
7 -0.1724582 -0.2038164 -0.162939515 1.00000000
8 -0.4478764 -1.0814591 -0.549578180 0.75675676
9 -0.1557272 -0.6612111 0.069310834 0.89189189
PREVALENCE_ ALD post_ltx
1 0.6666667
2 0.5238095
3 0.2619048
4 0.1666667
5 0.7380952
6 0.2380952
7 0.9761905
8 0.4523810
9 0.7857143
Taxonomy
1 k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Barnesiellaceae;g__Barnesiella
2 k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Butyricimonas
3 k__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Enterococcaceae;g__Enterococcus
4 k__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Enterorhabdus
5 k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter
6 k__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemania
7 k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnoclostridium
8 k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospiraceae_FCS020_group
9 k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides
log2FoldChange p_value padj final_sig
1 -1.9677889 0.011973662 0.043835781 FALSE
2 -1.9248057 0.001377080 0.010256872 FALSE
3 0.7455791 0.092817775 0.213283399 FALSE
4 -1.3801647 0.002025050 0.011821911 FALSE
5 -1.9111605 0.010528585 0.041705672 FALSE
6 -1.4112402 0.001166764 0.009693115 FALSE
7 -0.6520372 0.101176093 0.223000367 FALSE
8 -1.9230978 0.001758570 0.011530669 FALSE
9 -1.3370097 0.079717532 0.191645059 FALSE
3.4.2.6 Visualization
Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.
list_heatmap <- list_intersections[grep(paste(segment,level),
names(list_intersections),value=TRUE)]
p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
p_heatmap_lindaDot heatmap
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
ileum_taxa_tab) + xlab("") + ylab("")min_clr -1.551239
max_clr 6.219872
min_log -2.921954
max_log 2.390885
dotheatmap_lindaHorizontal bar plot
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))Using SeqID as id variables
p_prevalence_final <- ggarrange(p_prevalence,
ggplot() + theme_minimal(),
nrow = 2,heights = c(1,0.15))Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_bar()`).
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),
p_prevalence_final,
ncol=2,widths = c(0.8,0.3))Warning: Removed 17 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 17 rows containing missing values or values outside the scale range
(`geom_text()`).
pdot_heatmap_ileum <- p4 Data Analysis - Colon
segment="colon"
colon_data <- merging_data(asv_tab_1=asv_tab_ALD,
asv_tab_2=asv_tab_ikem,
taxa_tab_1=taxa_tab_ALD,
taxa_tab_2=taxa_tab_ikem,
metadata_1=metadata_ALD,
metadata_2=metadata_ikem,
segment=segment,Q="QALD")Removing 158 ASV(s)
Removing 739 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 207 ASV(s)
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]4.1 Filtering
Rules: - sequencing depth > 10000 - nearZeroVar() with default settings
Library size
read_counts(colon_asv_tab, line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
4.1.1 Sequencing depth
data_filt <- seq_depth_filtering(colon_asv_tab,
colon_taxa_tab,
colon_metadata,
seq_depth_threshold = 10000)Removing 4 ASV(s)
filt_colon_asv_tab <- data_filt[[1]]; alpha_colon_asv_tab <- filt_colon_asv_tab
filt_colon_taxa_tab <- data_filt[[2]]; alpha_colon_taxa_tab <- filt_colon_taxa_tab
filt_colon_metadata <- data_filt[[3]]; alpha_colon_metadata <- filt_colon_metadata
seq_step <- dim(filt_colon_asv_tab)[1]Library size
read_counts(filt_colon_asv_tab,line = c(10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
4.1.2 NearZeroVar
data_filt <- nearzerovar_filtering(filt_colon_asv_tab,
filt_colon_taxa_tab,
filt_colon_metadata)
filt_colon_asv_tab <- data_filt[[1]]
filt_colon_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_colon_asv_tab)[1]Library size
read_counts(filt_colon_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Check zero depth
data_filt <- check_zero_depth(filt_colon_asv_tab,
filt_colon_taxa_tab,
filt_colon_metadata)
filt_colon_asv_tab <- data_filt[[1]];
filt_colon_taxa_tab <- data_filt[[2]];
filt_colon_metadata <- data_filt[[3]]; Library size
read_counts(filt_colon_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
4.1.3 Final Counts
final_counts_filtering(colon_asv_tab,
filt_colon_asv_tab,
filt_colon_metadata,
seq_step, 0, nearzero_step) V1
Raw data: ASVs 4893
Raw data: Samples 400
Sequencing depth filt: ASVs 4889
Prevalence filt: ASVs 0
NearZeroVar filt: ASVs 514
Filt data: ASVs 514
Filt data: Samples 395
Filt data: Patients 210
Filt data: Patients.1 0
Filtered samples 5
Matrices Cecum;Rectum;CD
healthy 0
non-rPSC 0
rPSC 0
pre_ltx 0
post_ltx 0
ETOH 0
4.2 Alpha diversity
Calculating Richness, Shannon, Simpson, Pielou indexes on raw (unfiltered) rarefied data. Samples with sequencing depth < 10000 were excluded. Testing by linear mixed-effect model.
path = "../../../results/merged_sites/main_analysis/QALD/alpha_diversity"Calculation
# Construct MPSE object
alpha_colon_metadata$Sample <- alpha_colon_metadata$SampleID
colon_mpse <- as.MPSE(construct_phyloseq(alpha_colon_asv_tab,
alpha_colon_taxa_tab,
alpha_colon_metadata))
colon_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)
# Calculate alpha diversity - rarefied counts
colon_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)alpha_data <- data.frame(SampleID=colon_mpse$Sample.x,
Observe=colon_mpse$Observe,
Shannon=colon_mpse$Shannon,
Simpson=colon_mpse$Simpson,
Pielou=colon_mpse$Pielou,
Group=colon_mpse$Group,
Country=colon_mpse$Country,
Patient=colon_mpse$Patient)
write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
row.names = FALSE)4.2.1 Plots
p_boxplot_alpha <- alpha_diversity_countries(alpha_data,
show_legend = FALSE,
size=1,gap=TRUE,jitter = 1.5)Using SampleID, Group, Country as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha
# see the results
p_boxplot_alpha4.2.2 Linear Model
path = "../../../results/merged_sites/main_analysis/QALD/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))Richness
results_model <- pairwise.lmer(
formula = "Observe ~ Group + (1|Patient)",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_observe <- results_model[[1]]
results_model_observe_detailed <- results_model[[2]]
} else {
results_model_observe <- results_model
results_model_observe_detailed <- NA
}
# save the results
pc_observed[[segment]] <- results_model_observe# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| HC vs GroupALD post_ltx | -25.783 | 8.899 | 101.256 | -2.897 | 0.005 | 0.007 | ** |
| ALD post_ltx vs GroupPSC post_ltx | -1.333 | 8.824 | 154.989 | -0.151 | 0.880 | 0.880 | |
| HC vs GroupPSC post_ltx | -27.170 | 8.556 | 161.321 | -3.175 | 0.002 | 0.005 | ** |
knitr::kable(results_model_observe_detailed,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Shannon
results_model <- pairwise.lmer(
formula = "Shannon ~ Group + (1|Patient)",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_shannon <- results_model[[1]]
results_model_shannon_detailed <- results_model[[2]]
} else {
results_model_shannon <- results_model
results_model_shannon_detailed <- NA
}
# save the results
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| HC vs GroupALD post_ltx | -0.187 | 0.083 | 101.292 | -2.251 | 0.027 | 0.047 | * |
| ALD post_ltx vs GroupPSC post_ltx | -0.028 | 0.108 | 152.383 | -0.257 | 0.797 | 0.797 | |
| HC vs GroupPSC post_ltx | -0.218 | 0.100 | 158.220 | -2.172 | 0.031 | 0.047 | * |
knitr::kable(results_model_shannon_detailed,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Simpson
results_model <- pairwise.lmer(
formula = "Simpson ~ Group + (1|Patient)",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_simpson <- results_model[[1]]
results_model_simpson_detailed <- results_model[[2]]
} else {
results_model_simpson <- results_model
results_model_simpson_detailed <- NA
}
# save the results
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| HC vs GroupALD post_ltx | -0.007 | 0.006 | 100.855 | -1.206 | 0.231 | 0.284 | |
| ALD post_ltx vs GroupPSC post_ltx | -0.019 | 0.018 | 152.512 | -1.074 | 0.284 | 0.284 | |
| HC vs GroupPSC post_ltx | -0.027 | 0.017 | 157.717 | -1.607 | 0.110 | 0.284 |
knitr::kable(results_model_simpson_detailed,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Pielou
results_model <- pairwise.lmer(
formula = "Pielou ~ Group + (1|Patient)",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_pielou <- results_model[[1]]
results_model_pielou_detailed <- results_model[[2]]
} else {
results_model_pielou <- results_model
results_model_pielou_detailed <- NA
}
# save the results
pc_pielou[[segment]] <- as.data.frame(results_model_pielou)# see the results
knitr::kable(results_model_pielou,digits = 3,
caption = "Raw results of linear model of Pielou estimation.")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| HC vs GroupALD post_ltx | -0.013 | 0.011 | 101.108 | -1.182 | 0.240 | 0.401 | |
| ALD post_ltx vs GroupPSC post_ltx | -0.004 | 0.017 | 151.723 | -0.244 | 0.807 | 0.807 | |
| HC vs GroupPSC post_ltx | -0.018 | 0.016 | 157.456 | -1.113 | 0.267 | 0.401 |
knitr::kable(results_model_pielou_detailed,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
4.2.3 Saving results
alpha_list <- list(
Richness=pc_observed[[segment]] %>% rownames_to_column("Comparison"),
Shannon=pc_shannon[[segment]] %>% rownames_to_column("Comparison"),
Simpson=pc_simpson[[segment]] %>% rownames_to_column("Comparison"),
Pielou=pc_pielou[[segment]] %>% rownames_to_column("Comparison"))
write.xlsx(alpha_list,
file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))4.3 Beta diversity
Calculating Aitchison distance (euclidean distance on clr-transformed data) at genus level (main analysis). In supplements, there are also bray-curtis and Jaccard distance at ASV and genus levels. Testing by PERMANOVA.
4.3.1 Main analysis
Genus level, Aitchison distance
level="genus"path = "../../../results/merged_sites/main_analysis/QALD/beta_diversity"Aggregation, filtering
genus_data <- aggregate_taxa(colon_asv_tab,
colon_taxa_tab,
taxonomic_level=level,
names=TRUE)
filt_data <- filtering_steps(genus_data[[1]],
genus_data[[2]],
colon_metadata,
seq_depth_threshold=10000)
filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_genus_metadata <- filt_data[[3]]4.3.2 PERMANOVA
pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
patients = filt_colon_genus_metadata$Patient,
sim.method = "robust.aitchison", p.adjust.m="BH")
# tidy the results
pp_factor <- pp_main
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols
# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <-rbind(pp_factor
)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| HC vs ALD post_ltx | 1 | 912.451 | 5.183 | 0.027 | 0.001 | 0.002 | ** |
| ALD post_ltx vs PSC post_ltx | 1 | 477.932 | 2.675 | 0.009 | 0.040 | 0.040 | * |
| HC vs PSC post_ltx | 1 | 1115.245 | 6.365 | 0.021 | 0.001 | 0.002 | ** |
4.3.2.0.1 Plots
p <- pca_plot_custom(filt_colon_genus_tab,
filt_colon_genus_taxa,
filt_colon_genus_metadata,
show_boxplots = TRUE,
variable = "Group", size=2,
show_legend= FALSE)
# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p
# see the results
p4.3.2.1 Saving results
write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]],
file = file.path(path,
paste0("beta_diversity_results_", segment,".xlsx")))4.4 Univariate Analysis
level="genus"# needed paths
path = "../../../results/merged_sites/main_analysis/QALD/univariate_analysis"
path_maaslin=file.path("../../../intermediate_files/maaslin/QALD",level)
# variables
raw_linda_results_genus[[segment]] <- list()
linda_results_genus[[segment]] <- list()
# country and interaction problems
list_country_union <- list()
list_venns <- list()
# workbook for final df
wb <- createWorkbook()level="genus"Aggregate taxa
genus_data <- aggregate_taxa(colon_asv_tab,
colon_taxa_tab,
taxonomic_level = level)
colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
colon_genus_asv_taxa_tab <- create_asv_taxa_table(colon_genus_tab,
colon_genus_taxa_tab)4.4.0.0.1 PSC vs ALD
4.4.0.0.1.1 linDA
group <- c("ALD post_ltx","PSC post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])# prepare the data
linda_data <- binomial_prep(colon_genus_tab,
colon_genus_taxa_tab,
colon_metadata,
group, usage="linDA")Removing 12 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]
# fit the model
linda.obj <- linda(filt_colon_uni_data,
filt_colon_uni_metadata,
formula = '~ Group + (1|Patient)')0 features are filtered!
The filtered data has 301 samples and 170 features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)
# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
for (grp in c(group1)){
raw_linda_results_genus[[segment]][[grp]] <-
rawlinda.df(linda.output,
grp,
filt_colon_uni_data,
filt_colon_uni_taxa)
linda_results_genus[[segment]][[grp]] <-
linda.df(linda.output,
grp,
filt_colon_uni_data,
filt_colon_uni_taxa)
}# volcano plot
volcano <- volcano_plot_linda(linda.output, group1,
taxa_table = filt_colon_uni_taxa) +
ggtitle(paste(group,collapse=" vs "))
# see the plot
volcano4.4.0.0.1.2 MaAsLin2
volcano <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) +
ggtitle(paste(group[1], "vs", group[2]))
volcano4.4.0.0.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
linda.output, fit_data,
raw_linda_results_genus,
segment = segment,
level=level)
list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]
# show the results
venn4.4.0.0.1.4 Basic statistics
uni_df <- merge(basic_univariate_statistics(linda_data,group),
raw_linda_results_genus[[segment]][[group1]],
by="SeqID",all=TRUE)[1] "ALD post_ltx"
[1] "PSC post_ltx"
[1] "ALD post_ltx"
[1] "PSC post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df
# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)4.4.0.0.1.5 Comparison with PSC-associated taxa
psc_ald_sig_colon <- uni_df %>%
dplyr::filter(final_sig==TRUE) %>%
dplyr::select(SeqID,Taxonomy,log2FoldChange)
psc_genus_colon <- psc_genus_colonlist_core <- list()
list_core[["PSC vs ALD"]] <- psc_ald_sig_colon$Taxonomy
list_core[["PSC-associated taxa"]] <- psc_genus_colon$Taxonomy
# venn diagram
venn <- ggvenn(list_core, fill_color = c("blue", "red")) +
ggtitle("Intersection between PSC-associated taxa and (PSC vs ALD)") +
guides(fill = "none")
vennintersection_df <- intersect(list_core$`PSC vs ALD`,list_core$`PSC-associated taxa`)
intersection_df[1] "k__Bacteria;p__Firmicutes;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Colidextribacter"
[2] "k__Bacteria;p__Firmicutes;c__Negativicutes;o__Veillonellales-Selenomonadales;f__Veillonellaceae;g__Dialister"
[3] "k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnoclostridium"
4.4.0.0.2 ALD vs HC
4.4.0.0.2.1 linDA
group <- c("HC","ALD post_ltx")
comparison_name <- paste0(group[1], " vs ",group[2])# prepare the data
linda_data <- binomial_prep(colon_genus_tab,
colon_genus_taxa_tab,
colon_metadata,
group, usage="linDA")Removing 100 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]
# fit the model
linda.obj <- linda(filt_colon_uni_data,
filt_colon_uni_metadata,
formula = '~ Group + (1|Patient)')0 features are filtered!
The filtered data has 187 samples and 186 features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)
# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
for (grp in c(group1)){
raw_linda_results_genus[[segment]][[grp]] <-
rawlinda.df(linda.output,
grp,
filt_colon_uni_data,
filt_colon_uni_taxa)
linda_results_genus[[segment]][[grp]] <-
linda.df(linda.output,
grp,
filt_colon_uni_data,
filt_colon_uni_taxa)
}# volcano plot
volcano <- volcano_plot_linda(linda.output, group1,
taxa_table = filt_colon_uni_taxa) +
ggtitle(paste(group,collapse=" vs "))
# see the plot
volcano4.4.0.0.2.2 MaAsLin2
volcano <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) +
ggtitle(paste(group[1], "vs", group[2]))
volcano4.4.0.0.2.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
linda.output, fit_data,
raw_linda_results_genus,
segment = segment,
level=level)
list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]
# show the results
venn4.4.0.0.2.4 Basic statistics
uni_df <- merge(basic_univariate_statistics(linda_data,group),
raw_linda_results_genus[[segment]][[group1]],
by="SeqID",all=TRUE)[1] "HC"
[1] "ALD post_ltx"
[1] "HC"
[1] "ALD post_ltx"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df
# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)4.4.0.0.2.5 Comparison with PSC-associated taxa
hc_ald_sig_colon <- uni_df %>%
dplyr::filter(final_sig==TRUE) %>%
dplyr::select(SeqID,Taxonomy,log2FoldChange)
psc_genus_colon <- psc_genus_colonlist_core <- list()
list_core[["ALD vs HC"]] <- hc_ald_sig_colon$Taxonomy
list_core[["PSC-associated taxa"]] <- psc_genus_colon$Taxonomy
# venn diagram
venn <- ggvenn(list_core, fill_color = c("blue", "red")) +
ggtitle("Intersection between PSC-associated taxa and (ALD vs HC)") +
guides(fill = "none")
vennFrom this Venn diagram, we are interested in EXLUSIVE RIGH JOIN
A <- list_core$`ALD vs HC`
B <- list_core$`PSC-associated taxa`
remaining_psc <- B[!(B %in% A)]
psc_genus_colon %>%
dplyr::filter(Taxonomy %in% remaining_psc) SeqID
1 Rothia
2 Parabacteroides
3 Fusicatenibacter
4 Lachnoclostridium
5 Colidextribacter
6 Dialister
7 Klebsiella
Taxonomy
1 k__Bacteria;p__Actinobacteriota;c__Actinobacteria;o__Micrococcales;f__Micrococcaceae;g__Rothia
2 k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides
3 k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter
4 k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnoclostridium
5 k__Bacteria;p__Firmicutes;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Colidextribacter
6 k__Bacteria;p__Firmicutes;c__Negativicutes;o__Veillonellales-Selenomonadales;f__Veillonellaceae;g__Dialister
7 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Klebsiella
log2FoldChange p_value padj
1 1.842842 1.452081e-05 1.953709e-04
2 -3.568149 2.950689e-06 6.238599e-05
3 -1.917547 1.097200e-02 3.455012e-02
4 -1.736481 1.812625e-03 8.383390e-03
5 -1.624469 8.895010e-03 2.991958e-02
6 2.144460 1.055678e-02 3.396528e-02
7 2.288824 1.471023e-03 7.257045e-03
uni_df %>%
dplyr::filter(Taxonomy %in% remaining_psc) SeqID Q1_clr HC Q1_clr ALD post_ltx Q1_ra HC
1 Colidextribacter 1.386414 1.3243781 0.0006398704
2 Dialister -2.055467 -1.8464644 0.0000000000
3 Fusicatenibacter 2.627958 -0.9536309 0.0024423570
4 Klebsiella -2.173923 -2.1627708 0.0000000000
5 Lachnoclostridium 4.526164 4.3276317 0.0163044634
6 Parabacteroides 3.207395 1.7713357 0.0044359835
7 Rothia -2.019276 -1.6659988 0.0000000000
Q1_ra ALD post_ltx MEDIAN_clr HC MEDIAN_clr ALD post_ltx MEDIAN_clr_ALL
1 0.0005364459 2.1904872 2.6123898 2.4197662
2 0.0000000000 -1.6466225 -1.5459890 -1.5933847
3 0.0000000000 3.7263000 3.4597865 3.6125117
4 0.0000000000 -1.9014687 -1.7576714 -1.8353611
5 0.0098512164 5.0769310 4.9156184 5.0561821
6 0.0005008488 3.8462676 3.5515723 3.7260301
7 0.0000000000 -0.9038266 0.1388787 -0.2249714
MEDIAN_ra HC MEDIAN_ra ALD post_ltx MEDIAN_ra_ALL Q3_clr HC
1 1.624514e-03 0.0018135471 0.0017926884 3.2727209
2 0.000000e+00 0.0000000000 0.0000000000 0.8918002
3 6.821282e-03 0.0042333567 0.0059990771 4.3400312
4 0.000000e+00 0.0000000000 0.0000000000 -1.5486429
5 2.914977e-02 0.0193969639 0.0225328829 5.7097862
6 7.486673e-03 0.0054098774 0.0067624683 4.3896860
7 7.396176e-05 0.0001571464 0.0000896218 0.3132439
Q3_clr ALD post_ltx Q3_ra HC Q3_ra ALD post_ltx MEAN_clr HC
1 3.346080 0.0038319946 0.0036286551 2.1263747
2 1.130148 0.0003777954 0.0003012978 -0.7515134
3 4.601093 0.0127660808 0.0130167982 3.4889730
4 -1.279010 0.0000000000 0.0000000000 -1.5998927
5 5.660613 0.0444031574 0.0311998481 5.1168178
6 4.456647 0.0122222926 0.0108266661 3.4580071
7 1.708148 0.0002178330 0.0006215708 -0.6688172
MEAN_clr ALD post_ltx MEAN_ra HC MEAN_ra ALD post_ltx Cliffs_delta_clr
1 2.07987983 0.0042530750 0.0026577988 0.05491991
2 -0.30065932 0.0006040873 0.0021484826 0.14187643
3 2.57899858 0.0109515100 0.0103134190 -0.10068650
4 -1.17216554 0.0008302963 0.0019654497 0.15125858
5 5.00529390 0.0390427752 0.0270145037 -0.08489703
6 2.84063312 0.0124980591 0.0096894309 -0.10549199
7 0.09788766 0.0003256949 0.0005838665 0.24965675
Cliffs_delta_ra LFC_clr LFC_ra PREVALENCE_ HC
1 -0.05995423 -0.04649491 -0.67827442 0.88421053
2 -0.44462243 0.45085405 1.83048904 0.28421053
3 -0.15240275 -0.90997442 -0.08660713 0.97894737
4 -0.74691076 0.42772714 1.24316131 0.06315789
5 -0.24942792 -0.11152395 -0.53132143 1.00000000
6 -0.20389016 -0.61737399 -0.36722022 0.91578947
7 -0.01876430 0.76670482 0.84211724 0.50526316
PREVALENCE_ ALD post_ltx
1 0.8369565
2 0.3043478
3 0.7391304
4 0.1304348
5 1.0000000
6 0.8260870
7 0.5760870
Taxonomy
1 k__Bacteria;p__Firmicutes;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Colidextribacter
2 k__Bacteria;p__Firmicutes;c__Negativicutes;o__Veillonellales-Selenomonadales;f__Veillonellaceae;g__Dialister
3 k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter
4 k__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Klebsiella
5 k__Bacteria;p__Firmicutes;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnoclostridium
6 k__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides
7 k__Bacteria;p__Actinobacteriota;c__Actinobacteria;o__Micrococcales;f__Micrococcaceae;g__Rothia
log2FoldChange p_value padj final_sig
1 -0.3104290 0.52649237 0.64677677 FALSE
2 0.5034364 0.40186250 0.53774406 FALSE
3 -1.3714286 0.02440421 0.05895042 FALSE
4 0.3653255 0.39248230 0.53238987 FALSE
5 -0.1464704 0.63786636 0.71471773 FALSE
6 -1.0640435 0.05019802 0.11249195 FALSE
7 0.9879995 0.01331926 0.03643209 FALSE
4.4.0.0.3 Visualization
Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.
list_heatmap <- list_intersections[grep(paste(segment,level),
names(list_intersections),value=TRUE)]
p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
p_heatmap_lindaDot heatmap
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
uni_statistics$colon[grepl(level,names(uni_statistics$colon))],
colon_taxa_tab) + xlab("") + ylab("")min_clr -1.8479
max_clr 6.028454
min_log -3.176554
max_log 2.795394
dotheatmap_lindaHorizontal bar plot
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))Using SeqID as id variables
p_prevalence_final <- ggarrange(p_prevalence,
ggplot() + theme_minimal(),
nrow = 2,heights = c(1,0.115))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.3))
dot_heatmap_colon <- p
dot_heatmap_colon5 Paper-ready visualizations
Alpha diversity
p_A <- alpha_div_plots$`terminal_ileum Country` +
ggtitle("Terminal ileum")+
theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
legend.position = "none")
p_B <- alpha_div_plots$`colon Country` +
ggtitle("Colon") +
theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
legend.position = "none")
QALD_alpha <- ggarrange(p_A, ggplot() + theme_minimal(),
p_B,
nrow=1, ncol=3,widths = c(1,0.1,1))
QALD_alphaBeta diversity
pca_ti <- pca_plots_list$`terminal_ileum genus custom` +
ggtitle("Terminal ileum")+
theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
legend.position = "none")
pca_colon <- pca_plots_list$`colon genus custom` +
ggtitle("Colon")+
theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
legend.position = "none")
genus_QALD_beta <- ggarrange(pca_ti,
ggplot() + theme_minimal(),
pca_colon,
ncol=3,
widths = c(1,0.1,1))
genus_QALD_betaAlpha + Beta diversity
alpha_beta <- ggarrange(QALD_alpha,genus_QALD_beta,
ncol = 1,nrow=2,labels = c("A","B"))
alpha_beta5.1 Supplement
genus_QALD_beta <- ggarrange(genus_QALD_beta,ggplot() + theme_minimal(),ncol=2,
widths = c(1,0.15))
genus_QALD_beta6 Session info
sessionInfo()R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 26100)
Matrix products: default
locale:
[1] LC_COLLATE=Czech_Czechia.utf8 LC_CTYPE=Czech_Czechia.utf8
[3] LC_MONETARY=Czech_Czechia.utf8 LC_NUMERIC=C
[5] LC_TIME=Czech_Czechia.utf8
time zone: Europe/Prague
tzcode source: internal
attached base packages:
[1] parallel stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] mice_3.17.0 picante_1.8.2 ape_5.8-1
[4] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
[7] tidyverse_2.0.0 kableExtra_1.4.0 tidyr_1.3.1
[10] gbm_2.2.2 doParallel_1.0.17 iterators_1.0.14
[13] foreach_1.5.2 ranger_0.17.0 ggvenn_0.1.15
[16] Maaslin2_1.16.0 purrr_1.0.2 pROC_1.18.5
[19] glmnet_4.1-8 MicrobiomeStat_1.2 caret_6.0-94
[22] openxlsx_4.2.8 magrittr_2.0.3 emmeans_1.11.2-8
[25] lmerTest_3.1-3 robustlmm_3.3-1 lme4_1.1-37
[28] Matrix_1.6-5 mgcv_1.9-1 nlme_3.1-164
[31] pheatmap_1.0.13 reshape2_1.4.4 vegan_2.6-10
[34] lattice_0.22-6 permute_0.9-8 ggplotify_0.1.2
[37] ggrepel_0.9.6 ggpubr_0.6.1 MicrobiotaProcess_1.14.1
[40] phyloseq_1.46.0 ggplot2_3.5.2 tibble_3.2.1
[43] dplyr_1.1.4 cowplot_1.2.0 readr_2.1.5
[46] igraph_2.1.4 ccrepe_1.38.1 data.table_1.15.4
loaded via a namespace (and not attached):
[1] fs_1.6.5 matrixStats_1.5.0
[3] bitops_1.0-9 RColorBrewer_1.1-3
[5] numDeriv_2016.8-1.1 tools_4.3.1
[7] backports_1.4.1 R6_2.6.1
[9] lazyeval_0.2.2 jomo_2.7-6
[11] rhdf5filters_1.14.1 withr_3.0.2
[13] gridExtra_2.3 cli_3.6.2
[15] Biobase_2.62.0 logging_0.10-108
[17] biglm_0.9-3 sandwich_3.1-1
[19] labeling_0.4.3 mvtnorm_1.3-3
[21] robustbase_0.99-4-1 pbapply_1.7-4
[23] systemfonts_1.2.2 yulab.utils_0.2.1
[25] svglite_2.1.3 parallelly_1.38.0
[27] rstudioapi_0.17.1 generics_0.1.4
[29] gridGraphics_0.5-1 shape_1.4.6.1
[31] car_3.1-3 zip_2.3.2
[33] biomformat_1.30.0 S4Vectors_0.40.2
[35] abind_1.4-8 infotheo_1.2.0.1
[37] lifecycle_1.0.4 multcomp_1.4-28
[39] yaml_2.3.10 carData_3.0-5
[41] SummarizedExperiment_1.32.0 Rtsne_0.17
[43] rhdf5_2.46.1 recipes_1.3.1
[45] SparseArray_1.2.4 grid_4.3.1
[47] mitml_0.4-5 crayon_1.5.3
[49] pillar_1.11.0 knitr_1.50
[51] optparse_1.7.5 GenomicRanges_1.54.1
[53] statip_0.2.3 boot_1.3-32
[55] estimability_1.5.1 future.apply_1.11.3
[57] codetools_0.2-20 pan_1.9
[59] glue_1.7.0 ggfun_0.2.0
[61] vctrs_0.6.5 treeio_1.26.0
[63] Rdpack_2.6.4 gtable_0.3.6
[65] maditr_0.8.6 gower_1.0.1
[67] xfun_0.52 rbibutils_2.3
[69] S4Arrays_1.2.1 prodlim_2024.06.25
[71] libcoin_1.0-10 coda_0.19-4.1
[73] reformulas_0.4.1 pcaPP_2.0-5
[75] modeest_2.4.0 survival_3.5-8
[77] timeDate_4041.110 hardhat_1.4.2
[79] lava_1.8.1 statmod_1.5.0
[81] TH.data_1.1-4 ipred_0.9-15
[83] ggtree_3.10.1 GenomeInfoDb_1.38.8
[85] fBasics_4041.97 rpart_4.1.23
[87] colorspace_2.1-1 BiocGenerics_0.48.1
[89] DBI_1.2.3 nnet_7.3-19
[91] ade4_1.7-23 tidyselect_1.2.1
[93] timeSeries_4041.111 compiler_4.3.1
[95] microbiome_1.24.0 xml2_1.3.8
[97] DelayedArray_0.28.0 scales_1.4.0
[99] DEoptimR_1.1-4 spatial_7.3-18
[101] rappdirs_0.3.3 digest_0.6.35
[103] minqa_1.2.8 rmarkdown_2.29
[105] XVector_0.42.0 htmltools_0.5.8.1
[107] pkgconfig_2.0.3 MatrixGenerics_1.14.0
[109] stabledist_0.7-2 fastmap_1.2.0
[111] rlang_1.1.3 htmlwidgets_1.6.4
[113] farver_2.1.2 zoo_1.8-13
[115] jsonlite_2.0.0 ModelMetrics_1.2.2.2
[117] RCurl_1.98-1.17 modeltools_0.2-24
[119] Formula_1.2-5 GenomeInfoDbData_1.2.11
[121] patchwork_1.3.2 Rhdf5lib_1.24.2
[123] Rcpp_1.0.12 ggnewscale_0.5.2
[125] fastGHQuad_1.0.1 stringi_1.8.3
[127] ggstar_1.0.4 stable_1.1.6
[129] zlibbioc_1.48.2 MASS_7.3-60.0.1
[131] plyr_1.8.9 listenv_0.9.1
[133] Biostrings_2.70.3 splines_4.3.1
[135] hash_2.2.6.3 multtest_2.58.0
[137] hms_1.1.3 ggtreeExtra_1.12.0
[139] ggsignif_0.6.4 stats4_4.3.1
[141] rmutil_1.1.10 evaluate_1.0.5
[143] nloptr_2.2.1 tzdb_0.5.0
[145] getopt_1.20.4 future_1.34.0
[147] clue_0.3-66 coin_1.4-3
[149] broom_1.0.9 xtable_1.8-4
[151] tidytree_0.4.6 rstatix_0.7.2
[153] viridisLite_0.4.2 class_7.3-22
[155] aplot_0.2.8 IRanges_2.36.0
[157] cluster_2.1.8.1 timechange_0.3.0
[159] globals_0.18.0