Geography-independent mucosal microbiota alterations in primary sclerosing cholangitis persist after liver transplantation

3. Hypothesis: PSC disease is associated with IBD


1 Introduction

1.1 About

An IBD vs. no-IBD comparison was performed within PSC patients (pre-LTx and rPSC individuals combined) to discover IBD-specific pattern.

IBD vs no_IBD analysis on merged data

  • Alpha diversity –> group effect, country effect, interaction effect

  • Beta diversity – PERMANOVA, PCA -> group effect, country effect, interaction effect

  • DAA ->

    • o Group effect – linDA + MaAsLin2 intersection

    • o Country effect – – taxa with significant interaction effect were excluded based on individual post-hoc analysis. Taxa had to have the same direction in both countries

IBD vs no_IBD – finding IBD-specific pattern in PSC patients

Importing libraries and custom functions built for this analysis

source("custom_functions.R")

1.2 Data Import

Importing ASV, taxa and metadata tables for both Czech and Norway samples.

Czech

MICROBIOME

path = "../../../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
                                    check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
                                     check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
                                     check.names = FALSE))

IBD info

path = "../../../../data/clinical_data/"
cz <- read.csv(file.path(path,"clinical_metadata_cz.csv")) %>% dplyr::select(SampleID,PatientID,PSC_IBD,Group)

cz$PatientID <- as.character(cz$PatientID)
metadata_ikem <- merge(metadata_ikem,cz,by=c("SampleID", "Group"),all=TRUE)

Norway

MICROBIOME

path = "../../../../data/analysis_ready_data/norway/"
asv_tab_norway <- as.data.frame(fread(file.path(path,"asv_table_norway.csv"),
                                    check.names = FALSE))
taxa_tab_norway <- as.data.frame(fread(file.path(path,"taxa_table_norway.csv"),
                                    check.names = FALSE))
metadata_norway <- as.data.frame(fread(file.path(path,"metadata_norway.csv"),
                                    check.names = FALSE))

IBD info

path = "../../../../data/clinical_data/"
no <- read.csv(file.path(path,"clinical_metadata_no.csv")) %>% dplyr::select(SampleID,subjectid,IBD, Group)

1.2.1 Merging

Merging clinical metadata

clinical_metadata <- bind_rows(
  cz %>% dplyr::mutate(PSC_IBD = case_when(
    PSC_IBD == "0" ~ "no_ibd",
    PSC_IBD == "1" ~ "ibd",
  )) %>% `colnames<-`(c("SampleID","PatientID","IBD","Group")),
  no %>% `colnames<-`(c("SampleID","PatientID","IBD","Group")) %>%
    mutate(PatientID=paste0("NO_",PatientID)))

Terminal ileum

ileum_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=asv_tab_norway,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=taxa_tab_norway,
                           metadata_1=metadata_ikem,
                           metadata_2=metadata_norway,
                           segment="TI",Q="Q3")
Removing 1498 ASV(s)
Removing 1834 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 2296 ASV(s)
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]

ileum_metadata <- merge(ileum_metadata,
                        clinical_metadata %>%
                          dplyr::select(-Group),
                        by=c("SampleID"), all.x = TRUE) %>%
  dplyr::select(-PatientID,Group) %>% 
  dplyr::mutate(Group=IBD) %>% dplyr::select(-IBD)

Colon

colon_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=asv_tab_norway,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=taxa_tab_norway,
                           metadata_1=metadata_ikem,
                           metadata_2=metadata_norway,
                           segment="colon",Q="Q3")
Removing 739 ASV(s)
Removing 266 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 2852 ASV(s)
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]

colon_metadata <- merge(colon_metadata,
                        clinical_metadata %>%
                          dplyr::select(-Group),
                        by=c("SampleID"), all.x = TRUE) %>%
  dplyr::select(-PatientID,Group) %>% 
  dplyr::mutate(Group=IBD) %>% dplyr::select(-IBD)

1.3 Import clinical data

1.3.1 CZ

# clinical metadata
metadata_clinical <- read.csv("../../../../data/clinical_data/clinical_metadata_cz.csv")
metadata_clinical$PatientID <- as.character(metadata_clinical$PatientID)

# DYSBIOSIS
metadata_dysbiosis <- read.csv("../../../../data/clinical_data/dysbiosis_metadata.csv") %>%
  dplyr::filter(Country=="CZ")

list.files("../../../results/merged_sites/main_analysis/Q1/alpha_diversity/")
[1] "alpha_diversity_results_colon.xlsx"         
[2] "alpha_diversity_results_terminal_ileum.xlsx"
[3] "alpha_indices_colon.csv"                    
[4] "alpha_indices_terminal_ileum.csv"           
# ALPHA DIVERSITY
metadata_alpha_ileum <- read.csv(
  "../../../results/merged_sites/main_analysis/Q1/alpha_diversity/alpha_indices_terminal_ileum.csv") %>%
  dplyr::filter(Country=="CZ")

metadata_alpha_colon <- read.csv(
  "../../../results/merged_sites/main_analysis/Q1/alpha_diversity/alpha_indices_colon.csv") %>%
  dplyr::filter(Country=="CZ")

metadata_alpha <- rbind(metadata_alpha_ileum,metadata_alpha_colon) %>%
  mutate(PatientID=Patient) %>%
  dplyr::select(-c(Patient, Group))

# MERGING
metadata_cz <- full_join(metadata_clinical, metadata_dysbiosis, by=c("SampleID","Matrix","PatientID","Group","Country"))

metadata_cz <- full_join(metadata_cz, metadata_alpha, by=c("SampleID","PatientID","Country"))

metadata_cz$Group <- factor(metadata_cz$Group,levels = c("healthy","pre_ltx","non-rPSC","rPSC"))

1.3.2 NO

# clinical metadata
metadata_clinical <- read.csv("../../../../data/clinical_data/clinical_metadata_no.csv")
metadata_clinical %<>% mutate(PatientID=subjectid,
                              Matrix=segment) %>%
  dplyr::select(-subjectid,-segment)
metadata_clinical$PatientID <- as.character(paste0("NO_",metadata_clinical$PatientID))

# DYSBIOSIS
metadata_dysbiosis <- read.csv("../../../../data/clinical_data/dysbiosis_metadata.csv") %>%
  dplyr::filter(Country=="NO")  %>%
  dplyr::select(-c(Patient))

# ALPHA DIVERSITY
metadata_alpha_ileum <- read.csv(
  "../../../results/merged_sites/main_analysis/Q1/alpha_diversity/alpha_indices_terminal_ileum.csv") %>%
  dplyr::filter(Country=="NO")

metadata_alpha_colon <- read.csv(
  "../../../results/merged_sites/main_analysis/Q1/alpha_diversity/alpha_indices_colon.csv") %>%
  dplyr::filter(Country=="NO")

metadata_alpha <- rbind(metadata_alpha_ileum,metadata_alpha_colon) %>%
  mutate(PatientID=Patient) %>%
  dplyr::select(-c(Patient, Group))

# MERGING
metadata_no <- full_join(metadata_clinical, metadata_dysbiosis, by=c("SampleID","Matrix","PatientID","Group","Country"))

metadata_no <- full_join(metadata_no, metadata_alpha, by=c("SampleID","PatientID","Country"))

metadata_no$Group <- factor(metadata_no$Group,levels = c("healthy","pre_ltx","non-rPSC","rPSC"))

1.3.3 Merging metadata

metadata_cz %<>% dplyr::mutate(Calprotectin=Fecal.calprotectin,
                             AOM=AOM_score,
                             APRI=APRI_score,
                             FIB4=FIB4_score) %>%
  dplyr::select(SampleID,Matrix,PatientID,Group,Country,Bilirubin,ALP,Calprotectin,
                MAYO_PSC,AOM,APRI,FIB4,Platelets,AST,Creatinine,
                Albumin,ALT,PSC_IBD,GGT,INR,Albumin,Nancy_max,eMAYO,MAYO_dai,
                dys_unfiltered_asv,dys_unfiltered_genus,
                dys_filtered_asv,dys_filtered_genus,
                Observe,Shannon,Simpson,Pielou)

metadata_no %<>% dplyr::mutate(Platelets=TRC,
                             Creatinine=Kreatinin,
                             MAYO_PSC=Mayo_score,
                             AST=ASAT/60,
                             ALT=ALAT/60,
                             PSC_IBD=IBD,
                             Bilirubin=Bilirubin*17.1,
                             ALP=ALP/60,
                             Albumin=Albumin*10) %>%
  dplyr::mutate(PSC_IBD = case_when(
      PSC_IBD == "no_ibd" ~ "0",
      PSC_IBD == "ibd" ~ "1",
      TRUE ~ Group  # Keep other values as is
    )) %>%
  dplyr::select(SampleID,Matrix,PatientID,Group,Country,Bilirubin,ALP,Calprotectin,
                MAYO_PSC,AOM,APRI,FIB4,Platelets,AST,Creatinine,
                Albumin,ALT,PSC_IBD,
                dys_unfiltered_asv,dys_unfiltered_genus,
                dys_filtered_asv,dys_filtered_genus,
                Observe,Shannon,Simpson,Pielou) 

metadata_final <- merge(metadata_cz,metadata_no,all = TRUE)
metadata_final$Calprotectin[metadata_final$Calprotectin==">6000"] <- 6000
metadata_final$Calprotectin <- as.numeric(metadata_final$Calprotectin)

1.4 CHI SQUARE TEST

1.4.1 pre_LTx, rPSC, non-rPSC

cz <- cz %>% dplyr::group_by(PSC_IBD,Group) %>%
  distinct(PatientID, .keep_all = TRUE) %>%
  count() %>% drop_na() %>% 
  dplyr::mutate(PSC_IBD = case_when(
    PSC_IBD == "0" ~ "no_ibd",
    PSC_IBD == "1" ~ "ibd",
  )) %>% `colnames<-`(c("IBD","Group","n")) 

no <- no %>% dplyr::group_by(IBD,Group) %>%
  distinct(subjectid, .keep_all = TRUE) %>%
  count() %>% drop_na()

# Summarize by IBD and Group
data <- bind_rows(cz, no) %>%
  group_by(IBD, Group) %>%
  summarise(n = sum(n), .groups = "drop") %>%
  pivot_wider(names_from = Group, values_from = n) %>%
  column_to_rownames("IBD") %>%
  as.matrix()
    
chi_res <- chisq.test(data)
chi_res$expected
        non-rPSC  pre_ltx      rPSC
ibd    101.12774 96.11314 31.759124
no_ibd  19.87226 18.88686  6.240876
chi_res$observed
       non-rPSC pre_ltx rPSC
ibd         103      91   35
no_ibd       18      24    3
chi_res

    Pearson's Chi-squared test

data:  data
X-squared = 3.881, df = 2, p-value = 0.1436

1.4.2 pre_LTx + rPSC (PSC patients), non-rPSC

data2 <- data %>% as.data.frame() %>%
  dplyr::mutate(PSC=pre_ltx+rPSC) %>% 
  dplyr::select(PSC,`non-rPSC`) %>% as.matrix()

chi_res <- chisq.test(data2)
chi_res$expected
             PSC  non-rPSC
ibd    127.87226 101.12774
no_ibd  25.12774  19.87226
chi_res$observed
       PSC non-rPSC
ibd    126      103
no_ibd  27       18
chi_res

    Pearson's Chi-squared test with Yates' continuity correction

data:  data2
X-squared = 0.20305, df = 1, p-value = 0.6523

1.4.3 rPSC, non-rPSC

data3 <- data %>% as.data.frame() %>%
  dplyr::select(rPSC,`non-rPSC`) %>% as.matrix()

chi_res <- chisq.test(data3)
chi_res$expected
            rPSC  non-rPSC
ibd    32.981132 105.01887
no_ibd  5.018868  15.98113
chi_res$observed
       rPSC non-rPSC
ibd      35      103
no_ibd    3       18
chi_res

    Pearson's Chi-squared test with Yates' continuity correction

data:  data3
X-squared = 0.69593, df = 1, p-value = 0.4042

2 Data Analysis - Terminal ileum

segment="terminal_ileum"

2.1 Filtering

Rules:

  • sequencing depth > 10000

  • nearZeroVar() with default settings

Rarefaction Curve

path="../../../intermediate_files/rarecurves"
seq_depth_threshold <- 10000
ps <- construct_phyloseq(ileum_asv_tab,ileum_taxa_tab,ileum_metadata)
rareres <- get_rarecurve(obj=ps, chunks=500)
save(rareres,file = file.path(path,"rarefaction_ileum.Rdata"))
load(file.path(path,"rarefaction_ileum.Rdata"))

prare <- ggrarecurve(obj=rareres,
                      factorNames="Country",
                      indexNames=c("Observe")) + 
  theme_bw() +
  theme(axis.text=element_text(size=8), panel.grid=element_blank(),
        strip.background = element_rect(colour=NA,fill="grey"),
        strip.text.x = element_text(face="bold")) + 
  geom_vline(xintercept = seq_depth_threshold, 
             linetype="dashed", 
             color = "red") + 
  xlim(0, 20000)
The color has been set automatically, you can reset it manually by adding scale_color_manual(values=yourcolors)
prare

Library size

read_counts(ileum_asv_tab, line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

2.1.1 Sequencing depth

data_filt <- seq_depth_filtering(ileum_asv_tab,
                                 ileum_taxa_tab,
                                 ileum_metadata,
                                 seq_depth_threshold = 10000)
Removing 82 ASV(s)
filt_ileum_asv_tab <- data_filt[[1]]; alpha_ileum_asv_tab <- filt_ileum_asv_tab
filt_ileum_taxa_tab <- data_filt[[2]]; alpha_ileum_taxa_tab <- filt_ileum_taxa_tab
filt_ileum_metadata <- data_filt[[3]]; alpha_ileum_metadata <- filt_ileum_metadata

seq_step <- dim(filt_ileum_asv_tab)[1]

Library size

read_counts(filt_ileum_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

2.1.2 NearZeroVar

data_filt <- nearzerovar_filtering(filt_ileum_asv_tab, 
                                   filt_ileum_taxa_tab,
                                   filt_ileum_metadata)

filt_ileum_asv_tab <- data_filt[[1]]
filt_ileum_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_ileum_asv_tab)[1]

Library size

read_counts(filt_ileum_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

2.1.3 Final Counts

final_counts_filtering(ileum_asv_tab,
                       filt_ileum_asv_tab,
                       filt_ileum_metadata,
                       seq_step, 0, nearzero_step) %>% `colnames<-`("Count")
                            Count
Raw data: ASVs               2836
Raw data: Samples             108
Sequencing depth filt: ASVs  2754
Prevalence filt: ASVs           0
NearZeroVar filt: ASVs        994
Filt data: ASVs               994
Filt data: Samples            103
Filt data: Patients           103
Filt data: Patients.1           0
Filtered samples                5
Matrices                       TI
healthy                         0
non-rPSC                        0
rPSC                            0
pre_ltx                         0
post_ltx                        0
ETOH                            0

2.2 Alpha diversity

path = "../../../results/merged_sites/main_analysis/Q3/alpha_diversity"

Calculation

# Construct MPSE object
alpha_ileum_metadata$Sample <- alpha_ileum_metadata$SampleID
ileum_mpse <- as.MPSE(construct_phyloseq(alpha_ileum_asv_tab,
                                         alpha_ileum_taxa_tab,
                                         alpha_ileum_metadata))

ileum_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)

# Calculate alpha diversity - rarefied counts
ileum_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)
alpha_div_plots <- list()

# preparing data frame
alpha_data <- data.frame(SampleID=ileum_mpse$Sample.x,
                         Observe=ileum_mpse$Observe,
                         Shannon=ileum_mpse$Shannon,
                         Simpson=ileum_mpse$Simpson,
                         Pielou=ileum_mpse$Pielou,
                         Group=ileum_mpse$Group,
                         Country=ileum_mpse$Country,
                         Patient=ileum_mpse$Patient)

write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
          row.names = FALSE)
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))

2.2.1 Plots

p_boxplot_alpha <- alpha_diversity_countries(alpha_data,
                                             show_legend = FALSE,
                                             size=1,gap=FALSE)
Using SampleID, Group, Country, Patient as id variables
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country, Patient as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha

# see the results
p_boxplot_alpha

2.2.2 Linear Model

path = "../../../results/merged_sites/main_analysis/Q3/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))

Richness

results_model <- pairwise.lm(formula = "Observe ~ Group * Country",
                             factors=alpha_data$Group,
                             data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_observe <- results_model[[1]]
  results_model_observe_emeans <- results_model[[2]]
} else {
  results_model_observe <- results_model
  results_model_observe_emeans <- NA
}

# save the results
pc_observed <- list(); 
pc_observed[[segment]] <- results_model_observe
# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")
Raw results of linear model of richness estimation.
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd -41.186 24.041 -1.713 0.090
no_ibd , ibd - CZ vs NO -25.636 27.443 -0.934 0.352
no_ibd vs Groupibd:CountryNO 34.572 29.532 1.171 0.245
knitr::kable(results_model_observe_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Shannon

results_model <- pairwise.lm(formula = "Shannon ~ Group * Country",
                             factors=alpha_data$Group,
                             data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_shannon <- results_model[[1]]
  results_model_shannon_emeans <- results_model[[2]]
} else {
  results_model_shannon <- results_model
  results_model_shannon_emeans <- NA
}

# save the results
pc_shannon <- list(); 
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)
# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")
Raw results of linear model of Shannon estimation.
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd -0.438 0.325 -1.349 0.180
no_ibd , ibd - CZ vs NO -0.453 0.371 -1.221 0.225
no_ibd vs Groupibd:CountryNO 0.471 0.399 1.180 0.241
knitr::kable(results_model_shannon_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Simpson

results_model <- pairwise.lm(formula = "Simpson ~ Group * Country",
                                     factors=alpha_data$Group,
                                     data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_simpson <- results_model[[1]]
  results_model_simpson_emeans <- results_model[[2]]
} else {
  results_model_simpson <- results_model
  results_model_simpson_emeans <- NA
}


# save the results
pc_simpson <- list(); 
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)
# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")
Raw results of linear model of Simpson estimation.
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd -0.043 0.043 -1.004 0.318
no_ibd , ibd - CZ vs NO -0.047 0.049 -0.974 0.332
no_ibd vs Groupibd:CountryNO 0.045 0.052 0.865 0.389
knitr::kable(results_model_simpson_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Pielou

results_model <- pairwise.lm(formula = "Pielou ~ Group * Country",
                                     factors=alpha_data$Group,
                                     data=alpha_data)

# check interaction

if (!is.data.frame(results_model)){
  results_model_pielou <- results_model[[1]]
  results_model_pielou_emeans <- results_model[[2]]
} else {
  results_model_pielou <- results_model
  results_model_pielou_emeans <- NA
}

# save the results
pc_pielou <- list(); 
pc_pielou[[segment]] <- as.data.frame(results_model_pielou)
# see the results
knitr::kable(results_model_pielou,digits = 3,
caption = "Raw results of linear model of Pielou estimation.")
Raw results of linear model of Pielou estimation.
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd -0.042 0.046 -0.910 0.365
no_ibd , ibd - CZ vs NO -0.068 0.052 -1.305 0.195
no_ibd vs Groupibd:CountryNO 0.056 0.056 1.007 0.317
knitr::kable(results_model_pielou_emeans,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

2.2.3 Saving results

alpha_list <- list(
  Richness=pc_observed[[segment]],
  Shannon=pc_shannon[[segment]])
                   
write.xlsx(alpha_list, 
           file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))

2.3 Beta diversity

Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.

2.3.1 Main analysis

Genus level, Aitchison distance

level="genus"
path = "../../../results/merged_sites/main_analysis/Q3/beta_diversity"
pairwise_aitchison_raw <- list()
pca_plots_list <- list()

Aggregation, filtering

# Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             ileum_metadata,
                             seq_depth_threshold=10000)
Removing 6 ASV(s)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata <- filt_data[[3]]
2.3.1.0.1 PERMANOVA
pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,
                           filt_ileum_metadata$Group,
                           covariate = filt_ileum_metadata$Country, sim.method = "robust.aitchison", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "robust.aitchison", p.adjust.m="BH")

# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <- rbind(pp_factor,pp_cov,pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 204.897 1.148 0.011 0.175 0.175
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 562.985 3.155 0.03 0.001 0.001 ***
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 168.942 0.946 0.009 0.564 0.564

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.1]

if (length(interaction_sig)>0){
  for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_ileum_metadata$Group,
                      covariate = filt_ileum_metadata$Country, 
                      group1 = group1,
                      group2 = group2)
  print(result_list)
}
}
2.3.1.0.2 Plots

PCA

p <- pca_plot_custom(filt_ileum_genus_tab,
                                 filt_ileum_genus_taxa,
                                 filt_ileum_metadata,
                                 show_boxplots = TRUE,
                                 variable = "Group", size=3, show_legend=FALSE)

# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p

# see the results
p

2.3.1.1 Saving results

write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]], 
           file = file.path(path,
           paste0("beta_diversity_results_", segment,".xlsx")))

2.4 Univariate Analysis

2.4.1 Main analysis

level="genus"
# needed paths
path = "../../../results/merged_sites/main_analysis/Q3/univariate_analysis"
path_maaslin=file.path("../../../intermediate_files/maaslin/Q3",level)
# variables
raw_linda_results_genus <- list();
raw_linda_results_genus[[segment]] <- list()
linda_results_genus <- list(); 
linda_results_genus[[segment]] <- list()

# country and interaction problems
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()

# workbook for final df
wb <- createWorkbook()

# PSC effect
psc_effect <- list()

Aggregate taxa

genus_data <- aggregate_taxa(ileum_asv_tab,
                             ileum_taxa_tab,
                             taxonomic_level = level)

ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]

ileum_genus_asv_taxa_tab <- create_asv_taxa_table(ileum_genus_tab,
                                                  ileum_genus_taxa_tab)

2.4.1.1 ibd vs no_ibd

group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])
2.4.1.1.1 linDA
# prepare the data
linda_data <- binomial_prep(ileum_genus_tab,
                            ileum_genus_taxa_tab,
                            ileum_metadata,
                            group, usage="linDA")
Removing 6 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_ileum_uni_data, 
                   filt_ileum_uni_metadata, 
                   formula = '~ Group * Country')
0  features are filtered!
The filtered data has  103  samples and  236  features will be tested!
Warning in linda(filt_ileum_uni_data, filt_ileum_uni_metadata, formula = "~ Group * Country"): Some features have less than 3 nonzero values! 
                        They have virtually no statistical power. You may consider filtering them in the analysis!
Imputation approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO") 
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")

for (grp in c(group1,group2,group3)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_ileum_uni_data,
                filt_ileum_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_ileum_uni_data,
             filt_ileum_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, 
                                group1, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)

volcano_2  <- volcano_plot_linda(linda.output, group2, 
                                taxa_table = filt_ileum_uni_taxa) + 
            ggtitle("NO vs CZ")

volcano_3 <- volcano_plot_linda(linda.output, group3, 
                                taxa_table = filt_ileum_uni_taxa) +
            ggtitle("Interaction effect")

volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
volcano

volcano_linda_ileum <- volcano_1 + 
  ggtitle("IBD vs no_IBD") + 
  xlab("Effect size (LinDA)") + 
  ylab("–log"[10]~"(q-value)")
  
volcano_linda_ileum

2.4.1.1.2 MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) + 
            ggtitle(comparison_title)

volcano2 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa,variable="Country") + 
            ggtitle("Country effect")

volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcano

volcano_maaslin_ileum <- volcano1 + 
  ggtitle("IBD vs no_IBD") + 
  xlab("Effect size (MaAsLin2)") + 
  ylab("–log"[10]~"(q-value)")
  
volcano_maaslin_ileum

2.4.1.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

2.4.1.1.4 Interaction effect
list_interaction_significant <- country_interaction(group,
                                                    linda.output, 
                                                    list_intersections,
                                          filt_ileum_uni_data,
                                          filt_ileum_uni_metadata,
                                          segment=segment,
                                          level=level)

# see the result
## significant interaction effect
list_interaction_significant[[1]]
NULL
## results for czech cohort
list_interaction_significant[[2]]
[1] NA
## results for norwegian cohort
list_interaction_significant[[3]]
[1] NA

Removing problematic taxa

list_intersections <- removing_interaction_problems(group,
                                                    list_interaction_significant,
                                                    list_intersections,
                                                    segment=segment,
                                                    level=level)
2.4.1.1.5 Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)

2.4.1.2 Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]

if (length((list_heatmap[[1]][[1]]))>1){
  p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
  p_heatmap_linda
}

Dotheatmap

if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
                                      ileum_taxa_tab)
dotheatmap_linda

}

Horizontal bar plot

if (length((list_heatmap[[1]][[1]]))>0){
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))

p_prevalence_final <- ggarrange(p_prevalence,
                                ggplot() + theme_minimal(),
                                nrow = 2,heights = c(1,0.085))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.2))
p
}

2.4.1.3 Saving results

# ALL DATA
saveWorkbook(wb,file.path(path,paste0("uni_analysis_wb_",segment,".xlsx")),
             overwrite = TRUE)

# PSC effect
write.xlsx(psc_effect[[paste(segment,level)]],file.path(path,paste0("psc_effect_",segment,".xlsx")))

# SIGNIFICANT taxa

write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
            `names<-`(gsub(segment, "", names(
              list_intersections[grepl(segment,names(list_intersections))]))),
           file.path(path,paste0("significant_taxa_",segment,".xlsx")))

2.5 Results overview

2.5.0.1 Alpha diversity

knitr::kable(pc_observed[[segment]],
             digits = 3,
             caption = "Results of linear model testing ASV Richness")
Results of linear model testing ASV Richness
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd -41.186 24.041 -1.713 0.090
no_ibd , ibd - CZ vs NO -25.636 27.443 -0.934 0.352
no_ibd vs Groupibd:CountryNO 34.572 29.532 1.171 0.245
knitr::kable(pc_shannon[[segment]],
             digits = 3,
             caption = "Results of linear model testing Shannon index")
Results of linear model testing Shannon index
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd -0.438 0.325 -1.349 0.180
no_ibd , ibd - CZ vs NO -0.453 0.371 -1.221 0.225
no_ibd vs Groupibd:CountryNO 0.471 0.399 1.180 0.241
knitr::kable(pc_simpson[[segment]],
             digits = 3,
             caption = "Results of linear model testing Simpson index")
Results of linear model testing Simpson index
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd -0.043 0.043 -1.004 0.318
no_ibd , ibd - CZ vs NO -0.047 0.049 -0.974 0.332
no_ibd vs Groupibd:CountryNO 0.045 0.052 0.865 0.389
knitr::kable(pc_pielou[[segment]],
             digits = 3,
             caption = "Results of linear model testing Pielou index")
Results of linear model testing Pielou index
Estimate Std. Error t value Pr(>|t|)
no_ibd vs Groupibd -0.042 0.046 -0.910 0.365
no_ibd , ibd - CZ vs NO -0.068 0.052 -1.305 0.195
no_ibd vs Groupibd:CountryNO 0.056 0.056 1.007 0.317

Plots

alpha_div_plots[[paste(segment,"Country")]]

2.5.0.2 Beta diversity

Main results

knitr::kable(pairwise_aitchison_raw[[paste("genus", segment)]],
             digits = 3,
             caption = "Results of PERMANOVA - robust aitchison distance")
Results of PERMANOVA - robust aitchison distance
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 204.897 1.148 0.011 0.175 0.175
no_ibd vs ibd , Country 1 562.985 3.155 0.030 0.001 0.001 ***
no_ibd vs ibd : Country 1 168.942 0.946 0.009 0.564 0.564

PCA

pca_plots_list[[paste(segment,"genus custom")]]

2.5.0.3 Univariate analysis

Number of significant taxa

knitr::kable(as.data.frame(lapply(list_intersections,nrow)) %>% t() %>% 
  `colnames<-`("Count") %>% 
  `rownames<-`(c(names(list_intersections))),caption="Number of significant taxa")
Number of significant taxa
Count
terminal_ileum genus no_ibd vs ibd 0

3 Data Analysis - Colon

segment="colon"

3.1 Filtering

Rules: - prevalence > 5% (per group) - nearZeroVar with default settings - sequencing depth > 5000 - taxonomic assignment at least order

Library size

read_counts(colon_asv_tab, line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.1 Sequencing depth

data_filt <- seq_depth_filtering(colon_asv_tab,
                                 colon_taxa_tab,
                                 colon_metadata,
                                 seq_depth_threshold = 10000)
Removing 43 ASV(s)
filt_colon_asv_tab <- data_filt[[1]]; alpha_colon_asv_tab <- filt_colon_asv_tab
filt_colon_taxa_tab <- data_filt[[2]]; alpha_colon_taxa_tab <- filt_colon_taxa_tab
filt_colon_metadata <- data_filt[[3]]; alpha_colon_metadata <- filt_colon_metadata

seq_step <- dim(filt_colon_asv_tab)[1]

Library size

read_counts(filt_colon_asv_tab,line = c(10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.2 NearZeroVar

data_filt <- nearzerovar_filtering(filt_colon_asv_tab,
                                   filt_colon_taxa_tab,
                                   filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]
filt_colon_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_colon_asv_tab)[1]

Library size

read_counts(filt_colon_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

Check zero depth

data_filt <- check_zero_depth(filt_colon_asv_tab, 
                              filt_colon_taxa_tab, 
                              filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]; 
filt_colon_taxa_tab <- data_filt[[2]]; 
filt_colon_metadata <- data_filt[[3]]; 

Library size

read_counts(filt_colon_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.3 Final Counts

final_counts_filtering(colon_asv_tab,
                       filt_colon_asv_tab,
                       filt_colon_metadata,
                       seq_step, 0, nearzero_step)
                                               V1
Raw data: ASVs                               4084
Raw data: Samples                             350
Sequencing depth filt: ASVs                  4041
Prevalence filt: ASVs                           0
NearZeroVar filt: ASVs                        354
Filt data: ASVs                               354
Filt data: Samples                            337
Filt data: Patients                           143
Filt data: Patients.1                           0
Filtered samples                               13
Matrices                    CA;CD;SI;Cecum;Rectum
healthy                                         0
non-rPSC                                        0
rPSC                                            0
pre_ltx                                         0
post_ltx                                        0
ETOH                                            0

3.2 Alpha diversity

path = "../../../results/merged_sites/main_analysis/Q3/alpha_diversity"

Calculation

# Construct MPSE object
alpha_colon_metadata$Sample <- alpha_colon_metadata$SampleID
colon_mpse <- as.MPSE(construct_phyloseq(alpha_colon_asv_tab,
                                         alpha_colon_taxa_tab,
                                         alpha_colon_metadata))

colon_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)

# Calculate alpha diversity - rarefied counts
colon_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)
alpha_data <- data.frame(SampleID=colon_mpse$Sample.x,
                         Observe=colon_mpse$Observe,
                         Shannon=colon_mpse$Shannon,
                         Simpson=colon_mpse$Simpson,
                         Pielou=colon_mpse$Pielou,
                         Group=colon_mpse$Group,
                         Country=colon_mpse$Country,
                         Patient=colon_mpse$Patient)

write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
          row.names = FALSE)

3.2.1 Plots

p_boxplot_alpha <- alpha_diversity_countries(alpha_data,
                                             show_legend = FALSE,
                                             size=1,gap=FALSE)
Using SampleID, Group, Country, Patient as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country, Patient as id variables

Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha

# see the results
p_boxplot_alpha

3.2.2 Linear Model

path = "../../../results/merged_sites/main_analysis/Q3/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))

Richness

results_model <- pairwise.lmer(
  formula = "Observe ~ Group * Country + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_observe <- results_model[[1]]
  results_model_observe_detailed <- results_model[[2]]
} else {
  results_model_observe <- results_model
  results_model_observe_detailed <- NA
}

# save the results
pc_observed[[segment]] <- results_model_observe
# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")
Raw results of linear model of richness estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
no_ibd vs Groupibd 0.195 13.828 330.245 0.014 0.989 0.989
no_ibd vs ibd - CZ vs NO -0.556 18.199 238.733 -0.031 0.976 0.976
no_ibd vs Groupibd:CountryNO -1.880 18.971 263.174 -0.099 0.921 0.921
knitr::kable(results_model_observe_detailed,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Shannon

results_model <- pairwise.lmer(
  formula = "Shannon ~ Group * Country + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_shannon <- results_model[[1]]
  results_model_shannon_detailed <- results_model[[2]]
} else {
  results_model_shannon <- results_model
  results_model_shannon_detailed <- NA
}

# save the results
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)
# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")
Raw results of linear model of Shannon estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
no_ibd vs Groupibd -0.038 0.204 332.621 -0.185 0.854 0.854
no_ibd vs ibd - CZ vs NO -0.139 0.262 239.592 -0.529 0.597 0.597
no_ibd vs Groupibd:CountryNO 0.101 0.275 262.547 0.367 0.714 0.714
knitr::kable(results_model_shannon_detailed,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

Simpson

results_model <- pairwise.lmer(
  formula = "Simpson ~ Group * Country + (1|Patient)",
  factors=alpha_data$Group,
  data=alpha_data)

# check interaction
if (!is.data.frame(results_model)){
  results_model_simpson <- results_model[[1]]
  results_model_simpson_detailed <- results_model[[2]]
} else {
  results_model_simpson <- results_model
  results_model_simpson_detailed <- NA
}

# save the results
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)
# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")
Raw results of linear model of Simpson estimation.
Estimate Std..Error df t.value Pr…t.. p.adj sig
no_ibd vs Groupibd -0.015 0.033 332.974 -0.447 0.656 0.656
no_ibd vs ibd - CZ vs NO -0.036 0.042 240.107 -0.860 0.391 0.391
no_ibd vs Groupibd:CountryNO 0.035 0.044 261.852 0.797 0.426 0.426
knitr::kable(results_model_simpson_detailed,digits = 3,
caption = "Raw results of independent country analysis")

Table: Raw results of independent country analysis

3.2.3 Saving results

alpha_list <- list(
  Richness=pc_observed[[segment]] %>% rownames_to_column("Comparison"),
  Shannon=pc_shannon[[segment]] %>% rownames_to_column("Comparison"),
  Simpson=pc_simpson[[segment]] %>% rownames_to_column("Comparison"))
                   
write.xlsx(alpha_list, 
           file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))

3.3 Beta diversity

Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.

3.3.1 Main analysis - Genus, Aitchison

Genus level, Aitchison distance

level="genus"
path = "../../../results/merged_sites/main_analysis/Q3/beta_diversity"

Aggregation, filtering

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level=level,
                             names=TRUE)

filt_data <- filtering_steps(genus_data[[1]],
                             genus_data[[2]],
                             colon_metadata,
                            seq_depth_threshold=10000)
Removing 7 ASV(s)
filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_genus_metadata <- filt_data[[3]]
3.3.1.0.1 PERMANOVA
pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()

# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
                           covariate = filt_colon_genus_metadata$Country, 
                           patients = filt_colon_genus_metadata$Patient,
                           sim.method = "robust.aitchison", p.adjust.m="BH")

# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
                          covariate = filt_colon_genus_metadata$Country, 
                          interaction = TRUE, 
                          patients = filt_colon_genus_metadata$Patient,
                          sim.method = "robust.aitchison", p.adjust.m="BH")

# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]

cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols; 

# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <-rbind(pp_factor,pp_cov,pp_fac.cov)
# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")
PERMANOVA, GROUP separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 170.678 1.097 0.003 1 1
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")
PERMANOVA, COUNTRY separation
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd , Country 1 396.369 2.547 0.008 0.169 0.169
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")
PERMANOVA, INTERACTION GROUP:Country
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd : Country 1 133.144 0.855 0.003 0.999 0.999

Interaction check

interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.1]

if (length(interaction_sig)>0){
 for (i in 1:length(interaction_sig)){
  group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
  group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
  group2 <- unlist(strsplit(group2,split = " : "))[1]
  
  result_list <- adonis_postanalysis(x=pairwise_df,
                      factors = filt_colon_genus_metadata$Group,
                      covariate = filt_colon_genus_metadata$Country, 
                      group1 = group1,
                      group2 = group2,
                      patients = filt_colon_genus_metadata$Patient)
  print(result_list)
} 
}
3.3.1.0.2 Plots

PCoA custom

p <- pca_plot_custom(filt_colon_genus_tab,
                                 filt_colon_genus_taxa,
                                 filt_colon_genus_metadata,
                                 show_boxplots = TRUE,
                                 variable = "Group", size=2, 
                                 show_legend=FALSE)

# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p

# see the results
p

3.3.1.1 Saving results

write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]], 
           file = file.path(path,
           paste0("beta_diversity_results_", segment,".xlsx")))

3.4 Univariate Analysis

3.4.1 Main - Genus level

level="genus"
# needed paths
path = "../../../results/merged_sites/main_analysis/Q3/univariate_analysis"
path_maaslin=file.path("../../../intermediate_files/maaslin/Q3",level)
# variables
raw_linda_results_genus[[segment]] <- list()
linda_results_genus[[segment]] <- list()

# country and interaction problems
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()

# workbook for final df
wb <- createWorkbook()

# PSC effect
psc_effect <- list()

3.4.1.1 Genus level

level="genus"

Aggregate taxa

genus_data <- aggregate_taxa(colon_asv_tab,
                             colon_taxa_tab,
                             taxonomic_level = level)

colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]

colon_genus_asv_taxa_tab <- create_asv_taxa_table(colon_genus_tab,
                                                  colon_genus_taxa_tab)
3.4.1.1.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])
3.4.1.1.1.1 linda
# prepare the data
linda_data <- binomial_prep(colon_genus_tab,
                            colon_genus_taxa_tab,
                            colon_metadata,
                            group, usage="linDA")
Removing 7 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]

# fit the model
linda.obj <- linda(filt_colon_uni_data,
                   filt_colon_uni_metadata,
                   formula = '~ Group * Country + (1|Patient)')
0  features are filtered!
The filtered data has  337  samples and  159  features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)

# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO") 
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")


for (grp in c(group1,group2,group3)){
  raw_linda_results_genus[[segment]][[grp]] <- 
    rawlinda.df(linda.output,
                grp,
                filt_colon_uni_data,
                filt_colon_uni_taxa)
  
  linda_results_genus[[segment]][[grp]] <- 
    linda.df(linda.output,
             grp,
             filt_colon_uni_data,
             filt_colon_uni_taxa)
}
# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1, 
                                taxa_table = filt_colon_uni_taxa) + 
            ggtitle(paste(group,collapse=" vs "))

volcano_2  <- volcano_plot_linda(linda.output, group2, 
                                taxa_table = filt_colon_uni_taxa) + 
            ggtitle("NO vs CZ")

volcano_3 <- volcano_plot_linda(linda.output, group3, 
                                taxa_table = filt_colon_uni_taxa) +
            ggtitle("Interaction effect")

volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)

# see the plot
volcano

volcano_linda_colon <- volcano_1 + 
  ggtitle("IBD vs no_IBD") + 
  xlab("Effect size (LinDA)") + 
  ylab("–log"[10]~"(q-value)")
  
volcano_linda_ileum

3.4.1.1.1.2 MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) + 
            ggtitle(paste(group[1], "vs", group[2]))

volcano2 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa,variable="Country") + 
            ggtitle("Country effect")

volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcano

volcano_maaslin_colon <- volcano1 + 
  ggtitle("IBD vs no_IBD") + 
  xlab("Effect size (MaAsLin2)") + 
  ylab("–log"[10]~"(q-value)")
  
volcano_maaslin_colon

3.4.1.1.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
                                           linda.output, fit_data,
                                           raw_linda_results_genus, 
                                           segment = segment,
                                           level=level)

list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]

# show the results
venn

3.4.1.1.1.4 Interaction effect
list_interaction_significant <- country_interaction(group,
                                                    linda.output, 
                                                    list_intersections,
                                          filt_colon_uni_data,
                                          filt_colon_uni_metadata,
                                          segment=segment,
                                          level=level)

# see the result
## significant interaction effect
list_interaction_significant[[1]]
NULL
## results for czech cohort
list_interaction_significant[[2]]
[1] NA
## results for norwegian cohort
list_interaction_significant[[3]]
[1] NA

Removing problematic taxa

list_intersections <- removing_interaction_problems(group,
                                                    list_interaction_significant,
                                                    list_intersections,
                                                    segment=segment,
                                                    level=level)
3.4.1.1.2 Basic statistics
uni_df <-  merge(basic_univariate_statistics(linda_data,group),
                 raw_linda_results_genus[[segment]][[group1]],
                 by="SeqID",all=TRUE)
[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df

# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)
3.4.1.1.3 Visualization

Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.

list_heatmap <- list_intersections[grep(paste(segment,level),
                                  names(list_intersections),value=TRUE)]
if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
p_heatmap_linda
}

Dot heatmap

if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
                                      uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
                                      colon_taxa_tab)
dotheatmap_linda
}

Horizontal bar plot

if (length((list_heatmap[[1]][[1]]))>0){
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))

p_prevalence_final <- ggarrange(p_prevalence,
                                ggplot() + theme_minimal(),
                                nrow = 2,heights = c(1,0.085))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.2))
p
}

3.4.1.2 Saving results

# ALL DATA
saveWorkbook(wb,file.path(path,paste0("uni_analysis_wb_",segment,".xlsx")),
             overwrite = TRUE)

# SIGNIFICANT taxa

write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
            `names<-`(gsub(segment, "", names(
              list_intersections[grepl(segment,names(list_intersections))]))),
           file.path(path,paste0("significant_taxa_",segment,".xlsx")))

3.5 Results overview

3.5.0.1 Alpha diversity

knitr::kable(pc_observed[[segment]],
             digits = 3,
             caption = "Results of linear model testing ASV Richness")
Results of linear model testing ASV Richness
Estimate Std..Error df t.value Pr…t.. p.adj sig
no_ibd vs Groupibd 0.195 13.828 330.245 0.014 0.989 0.989
no_ibd vs ibd - CZ vs NO -0.556 18.199 238.733 -0.031 0.976 0.976
no_ibd vs Groupibd:CountryNO -1.880 18.971 263.174 -0.099 0.921 0.921
knitr::kable(pc_shannon[[segment]],
             digits = 3,
             caption = "Results of linear model testing Shannon index")
Results of linear model testing Shannon index
Estimate Std..Error df t.value Pr…t.. p.adj sig
no_ibd vs Groupibd -0.038 0.204 332.621 -0.185 0.854 0.854
no_ibd vs ibd - CZ vs NO -0.139 0.262 239.592 -0.529 0.597 0.597
no_ibd vs Groupibd:CountryNO 0.101 0.275 262.547 0.367 0.714 0.714
knitr::kable(pc_simpson[[segment]],
             digits = 3,
             caption = "Results of linear model testing Simpson index")
Results of linear model testing Simpson index
Estimate Std..Error df t.value Pr…t.. p.adj sig
no_ibd vs Groupibd -0.015 0.033 332.974 -0.447 0.656 0.656
no_ibd vs ibd - CZ vs NO -0.036 0.042 240.107 -0.860 0.391 0.391
no_ibd vs Groupibd:CountryNO 0.035 0.044 261.852 0.797 0.426 0.426
knitr::kable(pc_pielou[[segment]],
             digits = 3,
             caption = "Results of linear model testing Pielou index")

Table: Results of linear model testing Pielou index

Plots

alpha_div_plots[[paste(segment,"Country")]]

3.5.0.2 Beta diversity

Main results

knitr::kable(pairwise_aitchison_raw[[paste("genus", segment)]],
             digits = 3,
             caption = "Results of PERMANOVA - robust aitchison distance")
Results of PERMANOVA - robust aitchison distance
pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
no_ibd vs ibd 1 170.678 1.097 0.003 1.000 1.000
no_ibd vs ibd , Country 1 396.369 2.547 0.008 0.169 0.169
no_ibd vs ibd : Country 1 133.144 0.855 0.003 0.999 0.999

PCA

pca_plots_list[[paste(segment,"genus custom")]]

3.5.0.3 Univariate analysis

Number of significant taxa

knitr::kable(as.data.frame(lapply(list_intersections,nrow)) %>% t() %>% 
  `colnames<-`("Count") %>% 
  `rownames<-`(c(names(list_intersections))),caption="Number of significant taxa")
Number of significant taxa
Count
colon genus no_ibd vs ibd 0

4 Paper-ready visualizations

Alpha diversity

p_A <- alpha_div_plots$`terminal_ileum Country` +
  ggtitle("Terminal ileum")+
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15)) 

p_B <-  alpha_div_plots$`colon Country` +
  ggtitle("Colon") +
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15)) 

Q3_alpha <- ggarrange(p_A,ggplot() + theme_minimal(),p_B,nrow=1, ncol=3,
                      widths = c(1,0.1,1))
Q3_alpha

Beta diversity

pca_ti <- pca_plots_list$`terminal_ileum genus custom` 
pca_colon <- pca_plots_list$`colon genus custom` 

genus_Q3_beta <- ggarrange(pca_ti,
                           ggplot() + theme_minimal(),
                           pca_colon,ncol=3,
                           widths = c(1,0.1,1))
genus_Q3_beta

save(Q3_alpha,genus_Q3_beta,file="../../../results/merged_sites/main_analysis/Q3/figures.RData")

Alpha + Beta diversity

alpha_beta <- ggarrange(Q3_alpha,genus_Q3_beta,
                        ncol = 1,nrow=2,labels = c("A","B"))
alpha_beta

4.1 Supplement Figure

alpha_beta <- ggarrange(alpha_beta,ggplot() + theme_minimal(),ncol=2,
                                widths = c(1,0.15))

alpha_beta

DAA

if (exists("dot_heatmap_ileum")){
 p_ileum <- dot_heatmap_ileum + 
  ggtitle("Terminal ileum") +
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
        legend.position = "none") 
}

if (exists("dot_heatmap_colon")){
  p_colon <- dot_heatmap_colon  +
  ggtitle("Colon") +
  theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
        legend.position = "none")
}

if (exists("p_ileum") | exists("p_colon")){
  heatmap_plot <- ggarrange(p_ileum,
                          p_colon,
                          ncol = 2,labels=c("A","B"))
  heatmap_plot
}

4.2 Supplement Figure - Akkermansia

volcano_figure <- ggarrange(
  volcano_linda_ileum + ggtitle("Terminal ileum"),
  volcano_linda_colon + ggtitle("Colon"),
  volcano_maaslin_ileum + ggtitle(""),
  volcano_maaslin_colon + ggtitle(""),
  labels = c("A","","B",""))

volcano_figure

5 Session info

sessionInfo()
R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 26100)

Matrix products: default


locale:
[1] LC_COLLATE=Czech_Czechia.utf8  LC_CTYPE=Czech_Czechia.utf8   
[3] LC_MONETARY=Czech_Czechia.utf8 LC_NUMERIC=C                  
[5] LC_TIME=Czech_Czechia.utf8    

time zone: Europe/Prague
tzcode source: internal

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] mice_3.17.0              picante_1.8.2            ape_5.8-1               
 [4] lubridate_1.9.3          forcats_1.0.0            stringr_1.5.1           
 [7] tidyverse_2.0.0          kableExtra_1.4.0         tidyr_1.3.1             
[10] gbm_2.2.2                doParallel_1.0.17        iterators_1.0.14        
[13] foreach_1.5.2            ranger_0.17.0            ggvenn_0.1.15           
[16] Maaslin2_1.16.0          purrr_1.0.2              pROC_1.18.5             
[19] glmnet_4.1-8             MicrobiomeStat_1.2       caret_6.0-94            
[22] openxlsx_4.2.8           magrittr_2.0.3           emmeans_1.11.2-8        
[25] lmerTest_3.1-3           robustlmm_3.3-1          lme4_1.1-37             
[28] Matrix_1.6-5             mgcv_1.9-1               nlme_3.1-164            
[31] pheatmap_1.0.13          reshape2_1.4.4           vegan_2.6-10            
[34] lattice_0.22-6           permute_0.9-8            ggplotify_0.1.2         
[37] ggrepel_0.9.6            ggpubr_0.6.1             MicrobiotaProcess_1.14.1
[40] phyloseq_1.46.0          ggplot2_3.5.2            tibble_3.2.1            
[43] dplyr_1.1.4              cowplot_1.2.0            readr_2.1.5             
[46] igraph_2.1.4             ccrepe_1.38.1            data.table_1.15.4       

loaded via a namespace (and not attached):
  [1] fs_1.6.5                    matrixStats_1.5.0          
  [3] bitops_1.0-9                RColorBrewer_1.1-3         
  [5] numDeriv_2016.8-1.1         tools_4.3.1                
  [7] backports_1.4.1             R6_2.6.1                   
  [9] lazyeval_0.2.2              jomo_2.7-6                 
 [11] rhdf5filters_1.14.1         withr_3.0.2                
 [13] gridExtra_2.3               cli_3.6.2                  
 [15] Biobase_2.62.0              logging_0.10-108           
 [17] biglm_0.9-3                 sandwich_3.1-1             
 [19] labeling_0.4.3              mvtnorm_1.3-3              
 [21] robustbase_0.99-4-1         pbapply_1.7-4              
 [23] systemfonts_1.2.2           yulab.utils_0.2.1          
 [25] svglite_2.1.3               parallelly_1.38.0          
 [27] rstudioapi_0.17.1           generics_0.1.4             
 [29] gridGraphics_0.5-1          shape_1.4.6.1              
 [31] car_3.1-3                   zip_2.3.2                  
 [33] biomformat_1.30.0           S4Vectors_0.40.2           
 [35] abind_1.4-8                 infotheo_1.2.0.1           
 [37] lifecycle_1.0.4             multcomp_1.4-28            
 [39] yaml_2.3.10                 carData_3.0-5              
 [41] SummarizedExperiment_1.32.0 Rtsne_0.17                 
 [43] rhdf5_2.46.1                recipes_1.3.1              
 [45] SparseArray_1.2.4           grid_4.3.1                 
 [47] mitml_0.4-5                 crayon_1.5.3               
 [49] pillar_1.11.0               knitr_1.50                 
 [51] optparse_1.7.5              GenomicRanges_1.54.1       
 [53] statip_0.2.3                boot_1.3-32                
 [55] estimability_1.5.1          future.apply_1.11.3        
 [57] codetools_0.2-20            pan_1.9                    
 [59] glue_1.7.0                  ggfun_0.2.0                
 [61] vctrs_0.6.5                 treeio_1.26.0              
 [63] Rdpack_2.6.4                gtable_0.3.6               
 [65] gower_1.0.1                 xfun_0.52                  
 [67] rbibutils_2.3               S4Arrays_1.2.1             
 [69] prodlim_2024.06.25          libcoin_1.0-10             
 [71] coda_0.19-4.1               reformulas_0.4.1           
 [73] pcaPP_2.0-5                 modeest_2.4.0              
 [75] survival_3.5-8              timeDate_4041.110          
 [77] hardhat_1.4.2               lava_1.8.1                 
 [79] statmod_1.5.0               TH.data_1.1-4              
 [81] ipred_0.9-15                ggtree_3.10.1              
 [83] GenomeInfoDb_1.38.8         fBasics_4041.97            
 [85] rpart_4.1.23                colorspace_2.1-1           
 [87] BiocGenerics_0.48.1         DBI_1.2.3                  
 [89] nnet_7.3-19                 ade4_1.7-23                
 [91] tidyselect_1.2.1            timeSeries_4041.111        
 [93] compiler_4.3.1              microbiome_1.24.0          
 [95] xml2_1.3.8                  DelayedArray_0.28.0        
 [97] scales_1.4.0                DEoptimR_1.1-4             
 [99] spatial_7.3-18              rappdirs_0.3.3             
[101] digest_0.6.35               minqa_1.2.8                
[103] rmarkdown_2.29              XVector_0.42.0             
[105] htmltools_0.5.8.1           pkgconfig_2.0.3            
[107] MatrixGenerics_1.14.0       stabledist_0.7-2           
[109] fastmap_1.2.0               rlang_1.1.3                
[111] htmlwidgets_1.6.4           farver_2.1.2               
[113] zoo_1.8-13                  jsonlite_2.0.0             
[115] ModelMetrics_1.2.2.2        RCurl_1.98-1.17            
[117] modeltools_0.2-24           Formula_1.2-5              
[119] GenomeInfoDbData_1.2.11     patchwork_1.3.2            
[121] Rhdf5lib_1.24.2             Rcpp_1.0.12                
[123] ggnewscale_0.5.2            fastGHQuad_1.0.1           
[125] stringi_1.8.3               ggstar_1.0.4               
[127] stable_1.1.6                zlibbioc_1.48.2            
[129] MASS_7.3-60.0.1             plyr_1.8.9                 
[131] listenv_0.9.1               Biostrings_2.70.3          
[133] splines_4.3.1               hash_2.2.6.3               
[135] multtest_2.58.0             hms_1.1.3                  
[137] ggtreeExtra_1.12.0          ggsignif_0.6.4             
[139] stats4_4.3.1                rmutil_1.1.10              
[141] evaluate_1.0.5              nloptr_2.2.1               
[143] tzdb_0.5.0                  getopt_1.20.4              
[145] future_1.34.0               clue_0.3-66                
[147] coin_1.4-3                  broom_1.0.9                
[149] xtable_1.8-4                tidytree_0.4.6             
[151] rstatix_0.7.2               viridisLite_0.4.2          
[153] class_7.3-22                aplot_0.2.8                
[155] IRanges_2.36.0              cluster_2.1.8.1            
[157] timechange_0.3.0            globals_0.18.0