Geography-independent mucosal microbiota alterations in primary sclerosing cholangitis persist after liver transplantation

5. Hypothesis: PSC disease is associated with fecal calprotectin

1 Introduction

1.1 About

An IBD vs. no-IBD comparison was performed within PSC patients (pre-LTx and rPSC individuals combined) to discover IBD-specific pattern.

IBD vs no_IBD analysis on merged data

  • Alpha diversity –> group effect, country effect, interaction effect

  • Beta diversity – PERMANOVA, PCA -> group effect, country effect, interaction effect

  • DAA ->

    • o Group effect – linDA + MaAsLin2 intersection

    • o Country effect – – taxa with significant interaction effect were excluded based on individual post-hoc analysis. Taxa had to have the same direction in both countries

IBD vs no_IBD – finding IBD-specific pattern in PSC patients

Importing libraries and custom functions built for this analysis

source("custom_functions.R")

1.2 Data Import

Importing ASV, taxa and metadata tables for both Czech and Norway samples.

Czech

MICROBIOME

path = "../../../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
                                    check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
                                     check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
                                     check.names = FALSE))

Fecal calprotectin info

# clinical metadata
metadata_clinical <- read.csv("../../../../data/clinical_data/clinical_metadata_cz.csv")
metadata_clinical$PatientID <- as.character(metadata_clinical$PatientID)

Norway

MICROBIOME

path = "../../../../data/analysis_ready_data/norway/"
asv_tab_norway <- as.data.frame(fread(file.path(path,"asv_table_norway.csv"),
                                    check.names = FALSE))
taxa_tab_norway <- as.data.frame(fread(file.path(path,"taxa_table_norway.csv"),
                                    check.names = FALSE))
metadata_norway <- as.data.frame(fread(file.path(path,"metadata_norway.csv"),
                                    check.names = FALSE))

Fecal calprotectin info

# clinical metadata
metadata_clinical_no <- read.csv("../../../../data/clinical_data/clinical_metadata_no.csv")
metadata_clinical_no$PatientID <- paste0("NO_", as.character(metadata_clinical_no$subjectid))

1.2.1 Merging

Clinical data

metadata_ikem <- merge(metadata_ikem,metadata_clinical[,c("SampleID","Fecal.calprotectin")],by="SampleID",all.x=TRUE) %>%
  dplyr::mutate(Calprotectin=Fecal.calprotectin) %>% 
  dplyr::mutate(Calprotectin = case_when(
        Calprotectin == ">6000" ~ "6000",
        TRUE ~ Calprotectin)) %>%
  dplyr::mutate(Calprotectin= as.numeric(Calprotectin))

metadata_norway <- merge(metadata_norway,metadata_clinical_no[,c("SampleID","Calprotectin")],
                         by="SampleID",all.x=TRUE) %>%
  dplyr::mutate(Calprotectin= as.numeric(Calprotectin))

Terminal ileum

ileum_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2= asv_tab_norway,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=taxa_tab_norway,
                           metadata_1=metadata_ikem,
                           metadata_2=metadata_norway,
                           segment="TI",Q="Q5")
Removing 1498 ASV(s)
Removing 1834 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 2296 ASV(s)
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]
ileum_metadata %<>%  dplyr::mutate(
  log_Calprotectin=log(Calprotectin,base = 10))

median(ileum_metadata$Calprotectin,na.rm = TRUE)
[1] 84.5
median(ileum_metadata$log_Calprotectin,na.rm = TRUE)
[1] 1.926667
p1 <- ggplot(ileum_metadata,aes(log_Calprotectin)) + 
  geom_histogram() + 
  geom_vline(xintercept = log(250,base=10), linetype="dashed", 
               color = "red")

Splitting to groups

ileum_metadata %<>% dplyr::mutate(Calprotectin_groups = case_when(
    Calprotectin <= 250 ~ "low",
    Calprotectin > 250 ~ "high"))

ileum_metadata %>% group_by(Calprotectin_groups) %>%
  count()
# A tibble: 3 × 2
# Groups:   Calprotectin_groups [3]
  Calprotectin_groups     n
  <chr>               <int>
1 high                   32
2 low                    62
3 <NA>                   14
p2 <- ggplot(ileum_metadata %>% dplyr::filter(!is.na(Calprotectin_groups)),aes(Calprotectin_groups)) + 
  geom_bar()
p <- ggarrange(p1,p2,ncol = 2)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 14 rows containing non-finite outside the scale range
(`stat_bin()`).
p

ileum_metadata %<>% drop_na(Calprotectin) %>% dplyr::filter(Group!="healthy")
ileum_asv_tab %<>% dplyr::select("SeqID",ileum_metadata$SampleID)
ileum_taxa_tab %<>% dplyr::filter(SeqID %in% ileum_asv_tab$SeqID)

Colon

colon_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=asv_tab_norway,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=taxa_tab_norway,
                           metadata_1=metadata_ikem,
                           metadata_2=metadata_norway,
                           segment="colon",Q="Q5")
Removing 739 ASV(s)
Removing 266 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 2852 ASV(s)
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]
colon_metadata %<>%  dplyr::mutate(
  log_Calprotectin=log(Calprotectin,base = 10)) 

colon_metadata_forplot <- colon_metadata %>%
        group_by(Patient) %>%
        distinct(Patient, .keep_all = TRUE) %>%
        as.data.frame() 

median(colon_metadata_forplot$Calprotectin,na.rm = TRUE)
[1] 79
median(colon_metadata_forplot$log_Calprotectin,na.rm = TRUE)
[1] 1.897488
p1 <- ggplot(colon_metadata_forplot,aes(log_Calprotectin)) + 
  geom_histogram() + 
  geom_vline(xintercept = log(250,base=10), linetype="dashed", 
               color = "red")

Splitting to groups

colon_metadata %<>% dplyr::mutate(Calprotectin_groups = case_when(
    Calprotectin <= 250 ~ "low",
    Calprotectin > 250 ~ "high"))

colon_metadata_forplot %<>% dplyr::mutate(Calprotectin_groups = case_when(
    Calprotectin <= 250 ~ "low",
    Calprotectin > 250 ~ "high"))

colon_metadata_forplot %>% group_by(Calprotectin_groups) %>%
  count()
# A tibble: 3 × 2
# Groups:   Calprotectin_groups [3]
  Calprotectin_groups     n
  <chr>               <int>
1 high                   37
2 low                    87
3 <NA>                   22
p2 <- ggplot(colon_metadata_forplot %>% dplyr::filter(!is.na(Calprotectin_groups)),aes(Calprotectin_groups)) + 
  geom_bar()
p <- ggarrange(p1,p2,ncol = 2)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 22 rows containing non-finite outside the scale range
(`stat_bin()`).
p

colon_metadata %<>% drop_na(Calprotectin) %>% dplyr::filter(Group!="healthy")

colon_asv_tab %<>% dplyr::select("SeqID",colon_metadata$SampleID)
colon_taxa_tab %<>% dplyr::filter(SeqID %in% colon_asv_tab$SeqID)

1.3 CHI SQUARE TEST

1.3.1 pre_LTx, rPSC, non-rPSC

metadata <- rbind(ileum_metadata,colon_metadata) %>% 
        group_by(Patient) %>%
        distinct(Patient, .keep_all = TRUE) %>%
        as.data.frame() 
df <- metadata %>% dplyr::group_by(Calprotectin_groups,Group) %>%
  distinct(Patient, .keep_all = TRUE) %>%
  count() %>% drop_na() 

# Summarize by Nancy and Group
data <- df %>%
  group_by(Calprotectin_groups, Group) %>%
  summarise(n = sum(n), .groups = "drop") %>%
  pivot_wider(names_from = Group, values_from = n) %>%
  column_to_rownames("Calprotectin_groups") %>%
  as.matrix()

chi_res <- chisq.test(data)
data
     pre_ltx rPSC
high      25   13
low       76   17
chi_res$expected
      pre_ltx     rPSC
high 29.29771  8.70229
low  71.70229 21.29771
chi_res

    Pearson's Chi-squared test with Yates' continuity correction

data:  data
X-squared = 3.0279, df = 1, p-value = 0.08184

2 Data Analysis - Terminal ileum

segment="terminal_ileum"

2.1 Filtering

Rules:

  • sequencing depth > 10000

  • nearZeroVar() with default settings

Rarefaction Curve

path="../../../intermediate_files/rarecurves"
seq_depth_threshold <- 10000
ps <- construct_phyloseq(ileum_asv_tab,ileum_taxa_tab,ileum_metadata)
rareres <- get_rarecurve(obj=ps, chunks=500)
save(rareres,file = file.path(path,"rarefaction_ileum.Rdata"))
load(file.path(path,"rarefaction_ileum.Rdata"))

prare <- ggrarecurve(obj=rareres,
                      factorNames="Country",
                      indexNames=c("Observe")) + 
  theme_bw() +
  theme(axis.text=element_text(size=8), panel.grid=element_blank(),
        strip.background = element_rect(colour=NA,fill="grey"),
        strip.text.x = element_text(face="bold")) + 
  geom_vline(xintercept = seq_depth_threshold, 
             linetype="dashed", 
             color = "red") + 
  xlim(0, 20000)
The color has been set automatically, you can reset it manually by adding scale_color_manual(values=yourcolors)
prare

Library size

read_counts(ileum_asv_tab, line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

2.1.1 Sequencing depth

data_filt <- seq_depth_filtering(ileum_asv_tab,
                                 ileum_taxa_tab,
                                 ileum_metadata,
                                 seq_depth_threshold = 10000)
Removing 238 ASV(s)
filt_ileum_asv_tab <- data_filt[[1]]; alpha_ileum_asv_tab <- filt_ileum_asv_tab
filt_ileum_taxa_tab <- data_filt[[2]]; alpha_ileum_taxa_tab <- filt_ileum_taxa_tab
filt_ileum_metadata <- data_filt[[3]]; alpha_ileum_metadata <- filt_ileum_metadata

seq_step <- dim(filt_ileum_asv_tab)[1]

Library size

read_counts(filt_ileum_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

2.1.2 NearZeroVar

data_filt <- nearzerovar_filtering(filt_ileum_asv_tab, 
                                   filt_ileum_taxa_tab,
                                   filt_ileum_metadata)

filt_ileum_asv_tab <- data_filt[[1]]
filt_ileum_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_ileum_asv_tab)[1]

Library size

read_counts(filt_ileum_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

2.1.3 Final Counts

final_counts_filtering(ileum_asv_tab,
                       filt_ileum_asv_tab,
                       filt_ileum_metadata,
                       seq_step, 0, nearzero_step) %>% `colnames<-`("Count")
                            Count
Raw data: ASVs               2836
Raw data: Samples              94
Sequencing depth filt: ASVs  2598
Prevalence filt: ASVs           0
NearZeroVar filt: ASVs        627
Filt data: ASVs               627
Filt data: Samples             90
Filt data: Patients            90
Filt data: Patients.1           0
Filtered samples                4
Matrices                       TI
healthy                         0
non-rPSC                        0
rPSC                           26
pre_ltx                        64
post_ltx                        0
ETOH                            0

2.2 Alpha diversity

path = "../../../results/merged_sites/main_analysis/Q5/alpha_diversity"

Calculation

# Construct MPSE object
alpha_ileum_metadata$Sample <- alpha_ileum_metadata$SampleID
ileum_mpse <- as.MPSE(construct_phyloseq(alpha_ileum_asv_tab,
                                         alpha_ileum_taxa_tab,
                                         alpha_ileum_metadata))

ileum_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)

# Calculate alpha diversity - rarefied counts
ileum_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)
alpha_div_plots <- list()

# preparing data frame
alpha_data <- data.frame(SampleID=ileum_mpse$Sample.x,
                         Observe=ileum_mpse$Observe,
                         Shannon=ileum_mpse$Shannon,
                         Simpson=ileum_mpse$Simpson,
                         Pielou=ileum_mpse$Pielou,
                         Group=ileum_mpse$Calprotectin_groups,
                         Country=ileum_mpse$Country,
                         Patient=ileum_mpse$Patient)

write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
          row.names = FALSE)

alpha_data %<>% drop_na()

2.2.1 Plots

p_boxplot_alpha <- alpha_diversity_countries(alpha_data,
                                             show_legend = FALSE,
                                             size=1,gap=FALSE)
Using SampleID, Group, Country, Patient as id variables
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country, Patient as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha

# see the results
p_boxplot_alpha

2.2.2 Linear Model

path = "../../../results/merged_sites/main_analysis/Q5/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))

Richness

results_model <- pairwise.lm(formula = "Observe ~ Group * Country",
                             factors=alpha_data$Group,
                             data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_observe <- results_model[[1]]
  results_model_observe_emeans <- results_model[[2]]
} else {
  results_model_observe <- results_model
  results_model_observe_emeans <- NA
}

# save the results
pc_observed <- list(); 
pc_observed[[segment]] <- results_model_observe
# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")
Raw results of linear model of richness estimation.
Estimate Std. Error t value Pr(>|t|)
low vs Grouphigh -9.672 16.717 -0.579 0.564
low , high - CZ vs NO 3.275 14.092 0.232 0.817
low vs Grouphigh:CountryNO -15.219 23.797 -0.640 0.524
knitr::kable(results_model_observe_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Shannon

results_model <- pairwise.lm(formula = "Shannon ~ Group * Country",
                             factors=alpha_data$Group,
                             data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_shannon <- results_model[[1]]
  results_model_shannon_emeans <- results_model[[2]]
} else {
  results_model_shannon <- results_model
  results_model_shannon_emeans <- NA
}

# save the results
pc_shannon <- list(); 
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)
# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")
Raw results of linear model of Shannon estimation.
Estimate Std. Error t value Pr(>|t|)
low vs Grouphigh -0.010 0.219 -0.044 0.965
low , high - CZ vs NO 0.030 0.185 0.164 0.870
low vs Grouphigh:CountryNO -0.323 0.312 -1.034 0.304
knitr::kable(results_model_shannon_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Simpson

results_model <- pairwise.lm(formula = "Simpson ~ Group * Country",
                                     factors=alpha_data$Group,
                                     data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_simpson <- results_model[[1]]
  results_model_simpson_emeans <- results_model[[2]]
} else {
  results_model_simpson <- results_model
  results_model_simpson_emeans <- NA
}


# save the results
pc_simpson <- list(); 
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)
# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")
Raw results of linear model of Simpson estimation.
Estimate Std. Error t value Pr(>|t|)
low vs Grouphigh 0.000 0.029 -0.001 0.999
low , high - CZ vs NO 0.001 0.024 0.055 0.956
low vs Grouphigh:CountryNO -0.034 0.041 -0.826 0.411
knitr::kable(results_model_simpson_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

2.2.3 Saving results

alpha_list <- list(
  Richness=pc_observed[[segment]],
  Shannon=pc_shannon[[segment]])
                   
write.xlsx(alpha_list, 
           file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))

2.3 Beta diversity

Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.

2.3.1 Main analysis

Genus level, Aitchison distance

level="genus"
path = "../../../results/merged_sites/main_analysis/Q5/beta_diversity"
pairwise_aitchison_raw <- list()
pca_plots_list <- list()

Aggregation, filtering

# Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             ileum_metadata,
                             seq_depth_threshold=10000)
Removing 17 ASV(s)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata <- filt_data[[3]] %>% dplyr::mutate(Group=Calprotectin_groups)
2.3.1.0.1 PERMANOVA
pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,
                           filt_ileum_metadata$Group,
                           covariate = filt_ileum_metadata$Country, sim.method = "robust.aitchison", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "robust.aitchison", p.adjust.m="BH")

# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <- rbind(pp_factor,pp_cov,pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
low vs high 1 186.558 1.046 0.012 0.32 0.32
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
low vs high , Country 1 473.188 2.652 0.029 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
low vs high : Country 1 171.142 0.959 0.011 0.567 0.567

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.1]

if (length(interaction_sig)>0){
  for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_ileum_metadata$v,
                      covariate = filt_ileum_metadata$Country, 
                      group1 = group1,
                      group2 = group2)
  print(result_list)
}
}
2.3.1.0.2 Plots

PCA

p <- pca_plot_custom(filt_ileum_genus_tab,
                                 filt_ileum_genus_taxa,
                                 filt_ileum_metadata,
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=FALSE)

# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p

# see the results
p

2.3.1.1 Saving results

write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]], 
           file = file.path(path,
           paste0("beta_diversity_results_", segment,".xlsx")))

2.4 Univariate Analysis

2.4.1 Main analysis

level="genus"
# needed paths
path = "../../../results/merged_sites/main_analysis/Q5/univariate_analysis"
path_maaslin=file.path("../../../intermediate_files/maaslin/Q5",level)
# variables
raw_linda_results_genus <- list();
raw_linda_results_genus[[segment]] <- list()
linda_results_genus <- list(); 
linda_results_genus[[segment]] <- list()

# country and interaction problems
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()

# workbook for final df
wb <- createWorkbook()

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

ileum_genus_asv_taxa_tab <- create_asv_taxa_table(ileum_genus_tab,
                                                  ileum_genus_taxa_tab)

2.4.1.1 low vs high

group <- c("low","high")
comparison_name <- paste0(group[1], " vs ",group[2])
2.4.1.1.1 linDA
ileum_metadata %<>% dplyr::mutate(Group=Calprotectin_groups)

# prepare the data
linda_data <- binomial_prep(ileum_genus_tab,
                            ileum_genus_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
Removing 11 ASV(s)
Removing 6 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group * Country')
0  features are filtered!
The filtered data has  90  samples and  202  features will be tested!
Imputation approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO") 
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")

for (grp in c(group1,group2,group3)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)

volcano_2  <- volcano_plot_linda(linda.output, group2, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle("NO vs CZ")

volcano_3 <- volcano_plot_linda(linda.output, group3, 
                                taxa_table = filt_ileum_uni_taxa) +
            ggtitle("Interaction effect")

volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
volcano

2.4.1.1.2 MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)

volcano2 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa,variable="Country") + 
            ggtitle("Country effect")

volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcano

2.4.1.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

2.4.1.1.4 Interaction effect
list_interaction_significant <- country_interaction(group,
                                                    linda.output, 
                                                    list_intersections,
                                          filt_ileum_uni_data,
                                          filt_ileum_uni_metadata,
                                          segment=segment,
                                          level=level)

# see the result
## significant interaction effect
list_interaction_significant[[1]]
NULL
## results for czech cohort
list_interaction_significant[[2]]
[1] NA
## results for norwegian cohort
list_interaction_significant[[3]]
[1] NA

Removing problematic taxa

list_intersections <- removing_interaction_problems(group,
                                                    list_interaction_significant,
                                                    list_intersections,
                                                    segment=segment,
                                                    level=level)
2.4.1.1.5 Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "low"
[1] "high"
[1] "low"
[1] "high"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)

2.4.1.2 Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

if (length((list_heatmap[[1]][[1]]))>1){
  p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
  p_heatmap_linda
}

Dotheatmap

if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
                                      ileum_taxa_tab)
dotheatmap_linda

}

Horizontal bar plot

if (length((list_heatmap[[1]][[1]]))>0){
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))

p_prevalence_final <- ggarrange(p_prevalence,
                                ggplot() + theme_minimal(),
                                nrow = 2,heights = c(1,0.085))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.2))
p
}

2.4.1.3 Saving results

# ALL DATA
saveWorkbook(wb,file.path(path,paste0("uni_analysis_wb_",segment,".xlsx")),
             overwrite = TRUE)

# SIGNIFICANT taxa

write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
            `names<-`(gsub(segment, "", names(
              list_intersections[grepl(segment,names(list_intersections))]))),
           file.path(path,paste0("significant_taxa_",segment,".xlsx")))

2.5 Machine learning

path = "../../../results/merged_sites/main_analysis/Q5/models"

2.5.1 ElasticNet

model="enet"

2.5.1.1 ASV level

level="ASV"
2.5.1.1.1 low vs high
group <- c("low","high")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_asv_tab,
                                     ileum_taxa_tab,
                                     ileum_metadata,
                                     group, usage="ml_clr")
Removing 167 ASV(s)
Removing 71 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q5")

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ <- list()
models_cm <- list()
betas <- list()
roc_cs <- list()

models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                     [,1]
alpha                           0.0000000
lambda                          9.3347781
auc                             0.9694444
auc_czech                       0.9638889
auc_no                          0.9729167
auc_optimism_corrected          0.6255978
auc_optimism_corrected_CIL      0.4005091
auc_optimism_corrected_CIU      0.7861538
accuracy                        0.6777778
accuracy_czech                        NaN
accuracy_no                     0.7692308
accuracy_optimism_corrected     0.6413384
accuracy_optimism_corrected_CIL 0.4346841
accuracy_optimism_corrected_CIU 0.7214410
enet_model$conf_matrices
$original
    Predicted
True  0  1
   0  1 29
   1  0 60

$czech
    Predicted
True  0  1
   0  1 17
   1  0 20

$no
   1  
0 12 0
1 40 0
enet_model$plot
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

roc_c

2.5.1.2 Genus level

level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
2.5.1.2.1 low vs high
group <- c("low","high")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_ileum_uni_data <- binomial_prep(ileum_genus_tab,
                                     ileum_genus_taxa_tab,
                                     ileum_metadata,
                                     group, 
                                     usage="ml_clr")
Removing 11 ASV(s)
Removing 6 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_ileum_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,
                              Q="Q5")

# ROC
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                     [,1]
alpha                           0.0000000
lambda                          0.1511817
auc                             0.9420571
auc_czech                       0.8925971
auc_no                          0.9579125
auc_optimism_corrected          0.6199997
auc_optimism_corrected_CIL      0.5498124
auc_optimism_corrected_CIU      0.6872995
accuracy                        0.8465909
accuracy_czech                        NaN
accuracy_no                     0.9027356
accuracy_optimism_corrected     0.6540349
accuracy_optimism_corrected_CIL 0.5724537
accuracy_optimism_corrected_CIU 0.7100395
enet_model$conf_matrices
$original
    Predicted
True   0   1
   0  90  67
   1  14 357

$czech
    Predicted
True  0  1
   0 62 41
   1  8 88

$no
    Predicted
True   0   1
   0  28  26
   1   6 269
enet_model$plot
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

roc_c

2.5.1.3 Saving results

models_summ_df_ileum <- do.call(rbind, 
  models_summ[grep(segment,names(models_summ),value = TRUE)])

write.csv(models_summ_df_ileum,file.path(path,paste0("elastic_net_",segment,".csv")))

2.6 Results overview

2.6.0.1 Alpha diversity

pc_observed[[segment]]
                             Estimate Std. Error    t value  Pr(>|t|)
low vs Grouphigh            -9.672222   16.71735 -0.5785741 0.5643888
low , high - CZ vs NO        3.275000   14.09151  0.2324095 0.8167721
low vs Grouphigh:CountryNO -15.219444   23.79693 -0.6395550 0.5241619
pc_shannon[[segment]]
                               Estimate Std. Error    t value  Pr(>|t|)
low vs Grouphigh           -0.009581973  0.2194188 -0.0436698 0.9652689
low , high - CZ vs NO       0.030263347  0.1849541  0.1636263 0.8704093
low vs Grouphigh:CountryNO -0.322952862  0.3123398 -1.0339792 0.3040448
pc_simpson[[segment]]
                                Estimate Std. Error       t value  Pr(>|t|)
low vs Grouphigh           -2.636033e-05 0.02876313 -0.0009164625 0.9992709
low , high - CZ vs NO       1.331900e-03 0.02424523  0.0549345433 0.9563180
low vs Grouphigh:CountryNO -3.380300e-02 0.04094395 -0.8255921413 0.4113199

Plots

alpha_div_plots[[paste(segment,"Country")]]

2.6.0.2 Beta diversity

Main results

pairwise_aitchison_raw[[paste("genus", segment)]]
                  pairs Df SumsOfSqs   F.Model         R2 p.value p.adjusted
1           low vs high  1  186.5579 1.0457378 0.01150575   0.320      0.320
2 low vs high , Country  1  473.1877 2.6524217 0.02918333   0.001      0.001
3 low vs high : Country  1  171.1419 0.9588709 0.01055499   0.567      0.567
  sig
1    
2 ***
3    

PCA

pca_plots_list[[paste(segment,"genus custom")]]

2.6.0.3 Univariate analysis

Number of significant taxa

knitr::kable(as.data.frame(lapply(list_intersections,nrow)) %>% t() %>% 
  `colnames<-`("Count") %>% 
  `rownames<-`(c(names(list_intersections))),caption="Number of significant taxa")
Number of significant taxa
Count
terminal_ileum genus low vs high 0

2.6.0.4 Machine learning

Main models

knitr::kable(models_summ_df_ileum %>% dplyr::select(
"alpha","lambda",
"auc_optimism_corrected",
"auc_optimism_corrected_CIL",
"auc_optimism_corrected_CIU"),
             digits=2,caption="Elastic net results")
Elastic net results
alpha lambda auc_optimism_corrected auc_optimism_corrected_CIL auc_optimism_corrected_CIU
low vs high ASV terminal_ileum 0 9.33 0.63 0.40 0.79
low vs high genus terminal_ileum 0 0.15 0.62 0.55 0.69

ROC - Genus level

p <- roc_curve_all_custom(roc_cs[2],Q="Q5",
                     model_name="enet_model")

p

3 Data Analysis - Colon

segment="colon"

3.1 Filtering

Rules:

  • sequencing depth > 10000

  • nearZeroVar() with default settings

Rarefaction Curve

path="../../../intermediate_files/rarecurves"
seq_depth_threshold <- 10000
ps <- construct_phyloseq(colon_asv_tab,colon_taxa_tab,colon_metadata)
rareres <- get_rarecurve(obj=ps, chunks=500)
save(rareres,file = file.path(path,"rarefaction_colon.Rdata"))
load(file.path(path,"rarefaction_colon.Rdata"))

prare <- ggrarecurve(obj=rareres,
                      factorNames="Country",
                      indexNames=c("Observe")) + 
  theme_bw() +
  theme(axis.text=element_text(size=8), panel.grid=element_blank(),
        strip.background = element_rect(colour=NA,fill="grey"),
        strip.text.x = element_text(face="bold")) + 
  geom_vline(xintercept = seq_depth_threshold, 
             linetype="dashed", 
             color = "red") + 
  xlim(0, 20000)
The color has been set automatically, you can reset it manually by adding scale_color_manual(values=yourcolors)
prare

Library size

read_counts(colon_asv_tab, line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.1 Sequencing depth

data_filt <- seq_depth_filtering(colon_asv_tab,
                                 colon_taxa_tab,
                                 colon_metadata,
                                 seq_depth_threshold = 10000)
Removing 308 ASV(s)
filt_colon_asv_tab <- data_filt[[1]]; alpha_colon_asv_tab <- filt_colon_asv_tab
filt_colon_taxa_tab <- data_filt[[2]]; alpha_colon_taxa_tab <- filt_colon_taxa_tab
filt_colon_metadata <- data_filt[[3]]; alpha_colon_metadata <- filt_colon_metadata

seq_step <- dim(filt_colon_asv_tab)[1]

Library size

read_counts(filt_colon_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.2 NearZeroVar

data_filt <- nearzerovar_filtering(filt_colon_asv_tab, 
                                   filt_colon_taxa_tab,
                                   filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]
filt_colon_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_colon_asv_tab)[1]

Library size

read_counts(filt_colon_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.3 Final Counts

final_counts_filtering(colon_asv_tab,
                       filt_colon_asv_tab,
                       filt_colon_metadata,
                       seq_step, 0, nearzero_step) %>% `colnames<-`("Count")
                                            Count
Raw data: ASVs                               4084
Raw data: Samples                             306
Sequencing depth filt: ASVs                  3776
Prevalence filt: ASVs                           0
NearZeroVar filt: ASVs                        342
Filt data: ASVs                               342
Filt data: Samples                            293
Filt data: Patients                           121
Filt data: Patients.1                           0
Filtered samples                               13
Matrices                    Cecum;Rectum;CD;CA;SI
healthy                                         0
non-rPSC                                        0
rPSC                                           73
pre_ltx                                       220
post_ltx                                        0
ETOH                                            0

3.2 Alpha diversity

path = "../../../results/merged_sites/main_analysis/Q5/alpha_diversity"

Calculation

# Construct MPSE object
alpha_colon_metadata$Sample <- alpha_colon_metadata$SampleID
colon_mpse <- as.MPSE(construct_phyloseq(alpha_colon_asv_tab,
                                         alpha_colon_taxa_tab,
                                         alpha_colon_metadata))

colon_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)

# Calculate alpha diversity - rarefied counts
colon_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)
# preparing data frame
alpha_data <- data.frame(SampleID=colon_mpse$Sample.x,
                         Observe=colon_mpse$Observe,
                         Shannon=colon_mpse$Shannon,
                         Simpson=colon_mpse$Simpson,
                         Pielou=colon_mpse$Pielou,
                         Group=colon_mpse$Calprotectin_groups,
                         Country=colon_mpse$Country,
                         Patient=colon_mpse$Patient)

write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
          row.names = FALSE)

alpha_data %<>% drop_na()

3.2.1 Plots

p_boxplot_alpha <- alpha_diversity_countries(alpha_data,
                                             show_legend = FALSE,
                                             size=1,gap=FALSE)
Using SampleID, Group, Country, Patient as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country, Patient as id variables

Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha

# see the results
p_boxplot_alpha

3.2.2 Linear Model

path = "../../../results/merged_sites/main_analysis/Q5/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))

Richness

results_model <- pairwise.lmer(formula = "Observe ~ Group * Country +  (1|Patient)",
                             factors=alpha_data$Group,
                             data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_observe <- results_model[[1]]
  results_model_observe_emeans <- results_model[[2]]
} else {
  results_model_observe <- results_model
  results_model_observe_emeans <- NA
}

# save the results
pc_observed[[segment]] <- results_model_observe
# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")
Raw results of linear model of richness estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
low vs Grouphigh -10.382 17.179 118.864 -0.604 0.547 0.547
low vs high - CZ vs NO -14.423 13.672 119.220 -1.055 0.294 0.294
low vs Grouphigh:CountryNO -10.672 22.099 117.864 -0.483 0.630 0.630
knitr::kable(results_model_observe_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Shannon

results_model <- pairwise.lmer(formula = "Shannon ~ Group * Country + (1|Patient)",
                             factors=alpha_data$Group,
                             data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_shannon <- results_model[[1]]
  results_model_shannon_emeans <- results_model[[2]]
} else {
  results_model_shannon <- results_model
  results_model_shannon_emeans <- NA
}

# save the results
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)
# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")
Raw results of linear model of Shannon estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
low vs Grouphigh -0.203 0.238 119.622 -0.853 0.395 0.395
low vs high - CZ vs NO -0.259 0.190 120.062 -1.363 0.175 0.175
low vs Grouphigh:CountryNO -0.005 0.307 118.364 -0.015 0.988 0.988
knitr::kable(results_model_shannon_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Simpson

results_model <- pairwise.lmer(formula = "Simpson ~ Group * Country + (1|Patient)",
                                     factors=alpha_data$Group,
                                     data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_simpson <- results_model[[1]]
  results_model_simpson_emeans <- results_model[[2]]
} else {
  results_model_simpson <- results_model
  results_model_simpson_emeans <- NA
}


# save the results
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)
# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")
Raw results of linear model of Simpson estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
low vs Grouphigh -0.035 0.040 119.722 -0.888 0.376 0.376
low vs high - CZ vs NO -0.038 0.032 120.159 -1.206 0.230 0.230
low vs Grouphigh:CountryNO 0.023 0.051 118.473 0.455 0.650 0.650
knitr::kable(results_model_simpson_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

3.2.3 Saving results

alpha_list <- list(
  Richness=pc_observed[[segment]],
  Shannon=pc_shannon[[segment]])
                   
write.xlsx(alpha_list, 
           file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))

3.3 Beta diversity

Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.

3.3.1 Main analysis

Genus level, Aitchison distance

level="genus"
path = "../../../results/merged_sites/main_analysis/Q5/beta_diversity"

Aggregation, filtering

# Aggregation
genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             colon_metadata,
                             seq_depth_threshold=10000)
Removing 32 ASV(s)
filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_metadata <- filt_data[[3]]

filt_colon_metadata %<>% dplyr::mutate(Group=Calprotectin_groups)
3.3.1.0.1 PERMANOVA
pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_metadata$Group,
                           covariate = filt_colon_metadata$Country, 
                           patients = filt_colon_metadata$Patient,
                           sim.method = "robust.aitchison", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_colon_metadata$Group,
                          covariate = filt_colon_metadata$Country, 
                          interaction = TRUE, 
                          patients = filt_colon_metadata$Patient,
                          sim.method = "robust.aitchison", p.adjust.m="BH")

# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <-rbind(pp_factor,pp_cov,pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
low vs high 1 318.485 2.059 0.007 0.857 0.857
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
low vs high , Country 1 661.475 4.277 0.014 0.003 0.003 **
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
low vs high : Country 1 246.488 1.597 0.005 0.955 0.955

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.1]

if (length(interaction_sig)>0){
 for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_colon_genus_metadata$Group,
                      covariate = filt_colon_genus_metadata$Country, 
                      group1 = group1,
                      group2 = group2,
                      patients = filt_colon_genus_metadata$Patient)
  print(result_list)
} 
}
3.3.1.0.2 Plots

PCoA custom

p <- pca_plot_custom(filt_colon_genus_tab,
                                 filt_colon_genus_taxa,
                                 filt_colon_metadata,
                                 show_boxplots = TRUE,
                                 variable = "Group", size=2, 
                                 show_legend=FALSE)

# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p

# see the results
p

3.3.1.1 Saving results

write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]], 
           file = file.path(path,
           paste0("beta_diversity_results_", segment,".xlsx")))

3.4 Univariate Analysis

3.4.1 Main - Genus level

level="genus"
# needed paths
path = "../../../results/merged_sites/main_analysis/Q5/univariate_analysis"
path_maaslin=file.path("../../../intermediate_files/maaslin/Q5",level)
# variables
raw_linda_results_genus[[segment]] <- list()
linda_results_genus[[segment]] <- list()

# country and interaction problems
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()

# workbook for final df
wb <- createWorkbook()

3.4.1.1 Genus level

level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

colon_genus_asv_taxa_tab <- create_asv_taxa_table(colon_genus_tab,
                                                  colon_genus_taxa_tab)
3.4.1.1.1 low vs high
group <- c("low","high")
comparison_name <- paste0(group[1], " vs ",group[2])
3.4.1.1.1.1 linda
colon_metadata %<>% dplyr::mutate(Group=Calprotectin_groups)

# prepare the data
linda_data <- binomial_prep(colon_genus_tab,
                            colon_genus_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
Removing 25 ASV(s)
Removing 7 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data,
                   filt_colon_uni_metadata,
                   formula = '~ Group * Country + (1|Patient)')
0  features are filtered!
The filtered data has  293  samples and  164  features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO") 
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")


for (grp in c(group1,group2,group3)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1, 
                                taxa_table = filt_colon_uni_taxa) + 
            ggtitle(paste(group,collapse=" vs "))

volcano_2  <- volcano_plot_linda(linda.output, group2, 
                                taxa_table = filt_colon_uni_taxa) + 
            ggtitle("NO vs CZ")

volcano_3 <- volcano_plot_linda(linda.output, group3, 
                                taxa_table = filt_colon_uni_taxa) +
            ggtitle("Interaction effect")

volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)

# see the plot
volcano

3.4.1.1.1.2 MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano2 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa,variable="Country") + 
            ggtitle("Country effect")

volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcano

3.4.1.1.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

3.4.1.1.1.4 Interaction effect
list_interaction_significant <- country_interaction(group,
                                                    linda.output, 
                                                    list_intersections,
                                          filt_colon_uni_data,
                                          filt_colon_uni_metadata,
                                          segment=segment,
                                          level=level)

# see the result
## significant interaction effect
list_interaction_significant[[1]]
NULL
## results for czech cohort
list_interaction_significant[[2]]
[1] NA
## results for norwegian cohort
list_interaction_significant[[3]]
[1] NA

Removing problematic taxa

list_intersections <- removing_interaction_problems(group,
                                                    list_interaction_significant,
                                                    list_intersections,
                                                    segment=segment,
                                                    level=level)
3.4.1.1.2 Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "low"
[1] "high"
[1] "low"
[1] "high"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)
3.4.1.1.3 Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]
if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
p_heatmap_linda
}

Dot heatmap

if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
                                      colon_taxa_tab)
dotheatmap_linda
}

Horizontal bar plot

if (length((list_heatmap[[1]][[1]]))>0){
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))

p_prevalence_final <- ggarrange(p_prevalence,
                                ggplot() + theme_minimal(),
                                nrow = 2,heights = c(1,0.085))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.2))
p
}

3.4.1.2 Saving results

# ALL DATA
saveWorkbook(wb,file.path(path,paste0("uni_analysis_wb_",segment,".xlsx")),
             overwrite = TRUE)

# SIGNIFICANT taxa

write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
            `names<-`(gsub(segment, "", names(
              list_intersections[grepl(segment,names(list_intersections))]))),
           file.path(path,paste0("significant_taxa_",segment,".xlsx")))

3.5 Machine learning

path = "../../../results/merged_sites/main_analysis/Q5/models"

3.5.1 ElasticNet

model="enet"

3.5.1.1 ASV level

level="ASV"

#####low vs high

group <- c("low","high")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_asv_tab,
                                     colon_taxa_tab,
                                     colon_metadata,
                                     group, usage="ml_clr",patient = TRUE)
Removing 264 ASV(s)
Removing 44 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,
                              file=model_name,clust_var = "Patient",
                              Q="Q5")

# ROC curve
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ <- list()
models_cm <- list()
betas <- list()
roc_cs <- list()

models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                       [,1]
alpha                           1.000000000
lambda                          0.002352995
auc                             1.000000000
auc_czech                       1.000000000
auc_no                          1.000000000
auc_optimism_corrected          0.576388400
auc_optimism_corrected_CIL      0.418467650
auc_optimism_corrected_CIU      0.690289785
accuracy                        1.000000000
accuracy_czech                          NaN
accuracy_no                     1.000000000
accuracy_optimism_corrected     0.640802202
accuracy_optimism_corrected_CIL 0.528153409
accuracy_optimism_corrected_CIU 0.733313084
enet_model$conf_matrices
$original
    Predicted
True   0   1
   0  83   0
   1   0 210

$czech
    Predicted
True  0  1
   0 36  0
   1  0 30

$no
    Predicted
True   0   1
   0  47   0
   1   0 180
enet_model$plot
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

roc_c

3.5.1.2 Genus level

level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
3.5.1.2.1 low vs high
group <- c("low","high")
comparison_name <- paste0(group[1], " vs ",group[2])
model_name <- paste(comparison_name,level,segment)

# prepare the data
filt_colon_uni_data <- binomial_prep(colon_genus_tab,
                                     colon_genus_taxa_tab,
                                     colon_metadata,
                                     group, 
                                     usage="ml_clr",patient = TRUE)
Removing 25 ASV(s)
Removing 7 ASV(s)
# fit the model
enet_model <- glmnet_binomial(filt_colon_uni_data,
                              sample_method = "atypboot",
                              outcome="Group", 
                              N=10,
                              reuse=TRUE,clust_var = "Patient",
                              file=model_name,
                              Q="Q5")

# ROC
roc_c <- roc_curve(enet_model, group)

# save the results
models_summ[[model_name]] <- enet_model$model_summary
models_cm[[model_name]] <- enet_model$conf_matrices$original
roc_cs[[model_name]] <- enet_model$kfold_rocobjs
betas[[model_name]] <- as.matrix(enet_model$betas)

# see the results
enet_model$model_summary %>% t()
                                      [,1]
alpha                           0.20000000
lambda                          0.01770854
auc                             1.00000000
auc_czech                       1.00000000
auc_no                          1.00000000
auc_optimism_corrected          0.61887455
auc_optimism_corrected_CIL      0.50314091
auc_optimism_corrected_CIU      0.73986549
accuracy                        0.99658703
accuracy_czech                         NaN
accuracy_no                     0.99559471
accuracy_optimism_corrected     0.66245735
accuracy_optimism_corrected_CIL 0.55821114
accuracy_optimism_corrected_CIU 0.72898906
enet_model$conf_matrices
$original
    Predicted
True   0   1
   0  82   1
   1   0 210

$czech
    Predicted
True  0  1
   0 36  0
   1  0 30

$no
    Predicted
True   0   1
   0  46   1
   1   0 180
enet_model$plot
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'

roc_c

3.5.1.3 Saving results

models_summ_df_colon <- do.call(rbind, 
  models_summ[grep(segment,names(models_summ),value = TRUE)])

write.csv(models_summ_df_colon,file.path(path,paste0("elastic_net_",segment,".csv")))

3.6 Results overview

3.6.0.1 Alpha diversity

pc_observed[[segment]]
                            Estimate Std..Error       df    t.value  Pr...t..
low vs Grouphigh           -10.38203   17.17915 118.8636 -0.6043391 0.5467700
low vs high - CZ vs NO     -14.42260   13.67170 119.2200 -1.0549238 0.2935933
low vs Grouphigh:CountryNO -10.67238   22.09860 117.8642 -0.4829439 0.6300314
                               p.adj sig
low vs Grouphigh           0.5467700    
low vs high - CZ vs NO     0.2935933    
low vs Grouphigh:CountryNO 0.6300314    
pc_shannon[[segment]]
                               Estimate Std..Error       df     t.value
low vs Grouphigh           -0.203414982  0.2384651 119.6223 -0.85301770
low vs high - CZ vs NO     -0.258723672  0.1898142 120.0617 -1.36303680
low vs Grouphigh:CountryNO -0.004629771  0.3065856 118.3638 -0.01510107
                            Pr...t..     p.adj sig
low vs Grouphigh           0.3953543 0.3953543    
low vs high - CZ vs NO     0.1754218 0.1754218    
low vs Grouphigh:CountryNO 0.9879770 0.9879770    
pc_simpson[[segment]]
                              Estimate Std..Error       df    t.value  Pr...t..
low vs Grouphigh           -0.03519075 0.03962315 119.7223 -0.8881361 0.3762484
low vs high - CZ vs NO     -0.03804238 0.03153914 120.1589 -1.2061961 0.2301115
low vs Grouphigh:CountryNO  0.02320291 0.05094297 118.4727  0.4554683 0.6496060
                               p.adj sig
low vs Grouphigh           0.3762484    
low vs high - CZ vs NO     0.2301115    
low vs Grouphigh:CountryNO 0.6496060    

Plots

alpha_div_plots[[paste(segment,"Country")]]

3.6.0.2 Beta diversity

Main results

pairwise_aitchison_raw[[paste("genus", segment)]]
                  pairs Df SumsOfSqs  F.Model          R2 p.value p.adjusted
1           low vs high  1  318.4846 2.059227 0.006943927   0.857      0.857
2 low vs high , Country  1  661.4748 4.276901 0.014422151   0.003      0.003
3 low vs high : Country  1  246.4880 1.596999 0.005374183   0.955      0.955
  sig
1    
2  **
3    

PCA

pca_plots_list[[paste(segment,"genus custom")]]

3.6.0.3 Univariate analysis

Number of significant taxa

knitr::kable(as.data.frame(lapply(list_intersections,nrow)) %>% t() %>% 
  `colnames<-`("Count") %>% 
  `rownames<-`(c(names(list_intersections))),caption="Number of significant taxa")
Number of significant taxa
Count
colon genus low vs high 0

3.6.0.4 Machine learning

Main models

Summary

knitr::kable(models_summ_df_colon %>% dplyr::select(
"alpha","lambda",
"auc_optimism_corrected",
"auc_optimism_corrected_CIL",
"auc_optimism_corrected_CIU"),
             digits=3,caption="Elastic net results")
Elastic net results
alpha lambda auc_optimism_corrected auc_optimism_corrected_CIL auc_optimism_corrected_CIU
low vs high ASV colon 1.0 0.002 0.576 0.418 0.69
low vs high genus colon 0.2 0.018 0.619 0.503 0.74

ROC - Genus level

p <- roc_curve_all_custom(roc_cs[2],Q="Q5",
                     model_name="enet_model")

p

4 Paper-ready visualizations

Alpha diversity

p_A <- alpha_div_plots$`terminal_ileum Country` +
  ggtitle("Terminal ileum")+
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15)) 

p_B <-  alpha_div_plots$`colon Country` +
  ggtitle("Colon") +
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15)) 

Q5_alpha <- ggarrange(p_A,ggplot() + theme_minimal(),p_B,nrow=1, ncol=3,
                      widths = c(1,0.1,1))
Q5_alpha

Beta diversity

pca_ti <- pca_plots_list$`terminal_ileum genus custom` 
pca_colon <- pca_plots_list$`colon genus custom` 

genus_Q5_beta <- ggarrange(pca_ti,
                           ggplot() + theme_minimal(),
                           pca_colon,ncol=3,
                           widths = c(1,0.1,1))
genus_Q5_beta

save(Q5_alpha,genus_Q5_beta,file="../../../results/merged_sites/main_analysis/Q5/figures.RData")

Alpha + Beta diversity

alpha_beta <- ggarrange(Q5_alpha,genus_Q5_beta,
                        ncol = 1,nrow=2,labels = c("A","B"))
alpha_beta

4.1 Supplement Figure

alpha_beta <- ggarrange(alpha_beta,ggplot() + theme_minimal(),ncol=2,
                                widths = c(1,0.15))

alpha_beta

Q3+Q5

load("../../../results/merged_sites/main_analysis/Q3/figures.RData")
p <- ggarrange(Q3_alpha,Q5_alpha,
               genus_Q3_beta,genus_Q5_beta,ncol=1,labels = LETTERS)
p <- ggarrange(p,ncol=2,widths = c(1,0.15))

p

DAA

if (exists("dot_heatmap_ileum")){
 p_ileum <- dot_heatmap_ileum + 
  ggtitle("Terminal ileum") +
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
        legend.position = "none") 
}

if (exists("dot_heatmap_colon")){
  p_colon <- dot_heatmap_colon  +
  ggtitle("Colon") +
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
        legend.position = "none")
}

if (exists("p_ileum") | exists("p_colon")){
  heatmap_plot <- ggarrange(p_ileum,
                          p_colon,
                          ncol = 2,labels=c("A","B"))
  heatmap_plot
}

5 Session info

sessionInfo()
R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 26100)

Matrix products: default


locale:
[1] LC_COLLATE=Czech_Czechia.utf8  LC_CTYPE=Czech_Czechia.utf8   
[3] LC_MONETARY=Czech_Czechia.utf8 LC_NUMERIC=C                  
[5] LC_TIME=Czech_Czechia.utf8    

time zone: Europe/Prague
tzcode source: internal

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] mice_3.17.0              picante_1.8.2            ape_5.8-1               
 [4] lubridate_1.9.3          forcats_1.0.0            stringr_1.5.1           
 [7] tidyverse_2.0.0          kableExtra_1.4.0         tidyr_1.3.1             
[10] gbm_2.2.2                doParallel_1.0.17        iterators_1.0.14        
[13] foreach_1.5.2            ranger_0.17.0            ggvenn_0.1.15           
[16] Maaslin2_1.16.0          purrr_1.0.2              pROC_1.18.5             
[19] glmnet_4.1-8             MicrobiomeStat_1.2       caret_6.0-94            
[22] openxlsx_4.2.8           magrittr_2.0.3           emmeans_1.11.2-8        
[25] lmerTest_3.1-3           robustlmm_3.3-1          lme4_1.1-37             
[28] Matrix_1.6-5             mgcv_1.9-1               nlme_3.1-164            
[31] pheatmap_1.0.13          reshape2_1.4.4           vegan_2.6-10            
[34] lattice_0.22-6           permute_0.9-8            ggplotify_0.1.2         
[37] ggrepel_0.9.6            ggpubr_0.6.1             MicrobiotaProcess_1.14.1
[40] phyloseq_1.46.0          ggplot2_3.5.2            tibble_3.2.1            
[43] dplyr_1.1.4              cowplot_1.2.0            readr_2.1.5             
[46] igraph_2.1.4             ccrepe_1.38.1            data.table_1.15.4       

loaded via a namespace (and not attached):
  [1] fs_1.6.5                    matrixStats_1.5.0          
  [3] bitops_1.0-9                RColorBrewer_1.1-3         
  [5] numDeriv_2016.8-1.1         tools_4.3.1                
  [7] backports_1.4.1             utf8_1.2.4                 
  [9] R6_2.6.1                    lazyeval_0.2.2             
 [11] jomo_2.7-6                  rhdf5filters_1.14.1        
 [13] withr_3.0.2                 gridExtra_2.3              
 [15] cli_3.6.2                   Biobase_2.62.0             
 [17] logging_0.10-108            biglm_0.9-3                
 [19] sandwich_3.1-1              labeling_0.4.3             
 [21] mvtnorm_1.3-3               robustbase_0.99-4-1        
 [23] pbapply_1.7-4               systemfonts_1.2.2          
 [25] yulab.utils_0.2.1           svglite_2.1.3              
 [27] parallelly_1.38.0           rstudioapi_0.17.1          
 [29] generics_0.1.4              gridGraphics_0.5-1         
 [31] shape_1.4.6.1               car_3.1-3                  
 [33] zip_2.3.2                   biomformat_1.30.0          
 [35] S4Vectors_0.40.2            abind_1.4-8                
 [37] infotheo_1.2.0.1            lifecycle_1.0.4            
 [39] multcomp_1.4-28             yaml_2.3.10                
 [41] carData_3.0-5               SummarizedExperiment_1.32.0
 [43] Rtsne_0.17                  rhdf5_2.46.1               
 [45] recipes_1.3.1               SparseArray_1.2.4          
 [47] grid_4.3.1                  mitml_0.4-5                
 [49] crayon_1.5.3                pillar_1.11.0              
 [51] knitr_1.50                  optparse_1.7.5             
 [53] GenomicRanges_1.54.1        statip_0.2.3               
 [55] boot_1.3-32                 estimability_1.5.1         
 [57] future.apply_1.11.3         codetools_0.2-20           
 [59] pan_1.9                     glue_1.7.0                 
 [61] ggfun_0.2.0                 vctrs_0.6.5                
 [63] treeio_1.26.0               Rdpack_2.6.4               
 [65] gtable_0.3.6                gower_1.0.1                
 [67] xfun_0.52                   rbibutils_2.3              
 [69] S4Arrays_1.2.1              prodlim_2024.06.25         
 [71] libcoin_1.0-10              coda_0.19-4.1              
 [73] reformulas_0.4.1            pcaPP_2.0-5                
 [75] modeest_2.4.0               survival_3.5-8             
 [77] timeDate_4041.110           hardhat_1.4.2              
 [79] lava_1.8.1                  statmod_1.5.0              
 [81] TH.data_1.1-4               ipred_0.9-15               
 [83] ggtree_3.10.1               GenomeInfoDb_1.38.8        
 [85] fBasics_4041.97             rpart_4.1.23               
 [87] colorspace_2.1-1            BiocGenerics_0.48.1        
 [89] DBI_1.2.3                   nnet_7.3-19                
 [91] ade4_1.7-23                 tidyselect_1.2.1           
 [93] timeSeries_4041.111         compiler_4.3.1             
 [95] microbiome_1.24.0           xml2_1.3.8                 
 [97] DelayedArray_0.28.0         scales_1.4.0               
 [99] DEoptimR_1.1-4              spatial_7.3-18             
[101] rappdirs_0.3.3              digest_0.6.35              
[103] minqa_1.2.8                 rmarkdown_2.29             
[105] XVector_0.42.0              htmltools_0.5.8.1          
[107] pkgconfig_2.0.3             MatrixGenerics_1.14.0      
[109] stabledist_0.7-2            fastmap_1.2.0              
[111] rlang_1.1.3                 htmlwidgets_1.6.4          
[113] farver_2.1.2                zoo_1.8-13                 
[115] jsonlite_2.0.0              ModelMetrics_1.2.2.2       
[117] RCurl_1.98-1.17             modeltools_0.2-24          
[119] Formula_1.2-5               GenomeInfoDbData_1.2.11    
[121] patchwork_1.3.2             Rhdf5lib_1.24.2            
[123] Rcpp_1.0.12                 ggnewscale_0.5.2           
[125] fastGHQuad_1.0.1            stringi_1.8.3              
[127] ggstar_1.0.4                stable_1.1.6               
[129] zlibbioc_1.48.2             MASS_7.3-60.0.1            
[131] plyr_1.8.9                  listenv_0.9.1              
[133] Biostrings_2.70.3           splines_4.3.1              
[135] hash_2.2.6.3                multtest_2.58.0            
[137] hms_1.1.3                   ggtreeExtra_1.12.0         
[139] ggsignif_0.6.4              stats4_4.3.1               
[141] rmutil_1.1.10               evaluate_1.0.5             
[143] nloptr_2.2.1                tzdb_0.5.0                 
[145] getopt_1.20.4               future_1.34.0              
[147] clue_0.3-66                 coin_1.4-3                 
[149] broom_1.0.9                 xtable_1.8-4               
[151] tidytree_0.4.6              rstatix_0.7.2              
[153] viridisLite_0.4.2           class_7.3-22               
[155] aplot_0.2.8                 IRanges_2.36.0             
[157] cluster_2.1.8.1             timechange_0.3.0           
[159] globals_0.18.0