source("custom_functions.R")Geography-independent mucosal microbiota alterations in primary sclerosing cholangitis persist after liver transplantation
3. Hypothesis: PSC disease is associated with IBD
1 Introduction
1.1 About
An IBD vs. no-IBD comparison was performed within PSC patients (pre-LTx and rPSC individuals combined) to discover IBD-specific pattern.
IBD vs no_IBD analysis on merged data
Alpha diversity –> group effect, country effect, interaction effect
Beta diversity – PERMANOVA, PCA -> group effect, country effect, interaction effect
DAA ->
o Group effect – linDA + MaAsLin2 intersection
o Country effect – – taxa with significant interaction effect were excluded based on individual post-hoc analysis. Taxa had to have the same direction in both countries
IBD vs no_IBD – finding IBD-specific pattern in PSC patients
Importing libraries and custom functions built for this analysis
1.2 Data Import
Importing ASV, taxa and metadata tables for both Czech and Norway samples.
Czech
MICROBIOME
path = "../../../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
check.names = FALSE))IBD info
path = "../../../../data/clinical_data/"
cz <- read.csv(file.path(path,"clinical_metadata_cz.csv")) %>% dplyr::select(SampleID,PatientID,PSC_IBD,Group)
cz$PatientID <- as.character(cz$PatientID)
metadata_ikem <- merge(metadata_ikem,cz,by=c("SampleID", "Group"),all=TRUE)Norway
MICROBIOME
path = "../../../../data/analysis_ready_data/norway/"
asv_tab_norway <- as.data.frame(fread(file.path(path,"asv_table_norway.csv"),
check.names = FALSE))
taxa_tab_norway <- as.data.frame(fread(file.path(path,"taxa_table_norway.csv"),
check.names = FALSE))
metadata_norway <- as.data.frame(fread(file.path(path,"metadata_norway.csv"),
check.names = FALSE))IBD info
path = "../../../../data/clinical_data/"
no <- read.csv(file.path(path,"clinical_metadata_no.csv")) %>% dplyr::select(SampleID,subjectid,IBD, Group)1.2.1 Merging
Merging clinical metadata
clinical_metadata <- bind_rows(
cz %>% dplyr::mutate(PSC_IBD = case_when(
PSC_IBD == "0" ~ "no_ibd",
PSC_IBD == "1" ~ "ibd",
)) %>% `colnames<-`(c("SampleID","PatientID","IBD","Group")),
no %>% `colnames<-`(c("SampleID","PatientID","IBD","Group")) %>%
mutate(PatientID=paste0("NO_",PatientID)))Terminal ileum
ileum_data <- merging_data(asv_tab_1=asv_tab_ikem,
asv_tab_2=asv_tab_norway,
taxa_tab_1=taxa_tab_ikem,
taxa_tab_2=taxa_tab_norway,
metadata_1=metadata_ikem,
metadata_2=metadata_norway,
segment="TI",Q="Q3")Removing 1498 ASV(s)
Removing 1834 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 2296 ASV(s)
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]
ileum_metadata <- merge(ileum_metadata,
clinical_metadata %>%
dplyr::select(-Group),
by=c("SampleID"), all.x = TRUE) %>%
dplyr::select(-PatientID,Group) %>%
dplyr::mutate(Group=IBD) %>% dplyr::select(-IBD)Colon
colon_data <- merging_data(asv_tab_1=asv_tab_ikem,
asv_tab_2=asv_tab_norway,
taxa_tab_1=taxa_tab_ikem,
taxa_tab_2=taxa_tab_norway,
metadata_1=metadata_ikem,
metadata_2=metadata_norway,
segment="colon",Q="Q3")Removing 739 ASV(s)
Removing 266 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
Removing 2852 ASV(s)
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]
colon_metadata <- merge(colon_metadata,
clinical_metadata %>%
dplyr::select(-Group),
by=c("SampleID"), all.x = TRUE) %>%
dplyr::select(-PatientID,Group) %>%
dplyr::mutate(Group=IBD) %>% dplyr::select(-IBD)1.3 Import clinical data
1.3.1 CZ
# clinical metadata
metadata_clinical <- read.csv("../../../../data/clinical_data/clinical_metadata_cz.csv")
metadata_clinical$PatientID <- as.character(metadata_clinical$PatientID)
# DYSBIOSIS
metadata_dysbiosis <- read.csv("../../../../data/clinical_data/dysbiosis_metadata.csv") %>%
dplyr::filter(Country=="CZ")
list.files("../../../results/merged_sites/main_analysis/Q1/alpha_diversity/")[1] "alpha_diversity_results_colon.xlsx"
[2] "alpha_diversity_results_terminal_ileum.xlsx"
[3] "alpha_indices_colon.csv"
[4] "alpha_indices_terminal_ileum.csv"
# ALPHA DIVERSITY
metadata_alpha_ileum <- read.csv(
"../../../results/merged_sites/main_analysis/Q1/alpha_diversity/alpha_indices_terminal_ileum.csv") %>%
dplyr::filter(Country=="CZ")
metadata_alpha_colon <- read.csv(
"../../../results/merged_sites/main_analysis/Q1/alpha_diversity/alpha_indices_colon.csv") %>%
dplyr::filter(Country=="CZ")
metadata_alpha <- rbind(metadata_alpha_ileum,metadata_alpha_colon) %>%
mutate(PatientID=Patient) %>%
dplyr::select(-c(Patient, Group))
# MERGING
metadata_cz <- full_join(metadata_clinical, metadata_dysbiosis, by=c("SampleID","Matrix","PatientID","Group","Country"))
metadata_cz <- full_join(metadata_cz, metadata_alpha, by=c("SampleID","PatientID","Country"))
metadata_cz$Group <- factor(metadata_cz$Group,levels = c("healthy","pre_ltx","non-rPSC","rPSC"))1.3.2 NO
# clinical metadata
metadata_clinical <- read.csv("../../../../data/clinical_data/clinical_metadata_no.csv")
metadata_clinical %<>% mutate(PatientID=subjectid,
Matrix=segment) %>%
dplyr::select(-subjectid,-segment)
metadata_clinical$PatientID <- as.character(paste0("NO_",metadata_clinical$PatientID))
# DYSBIOSIS
metadata_dysbiosis <- read.csv("../../../../data/clinical_data/dysbiosis_metadata.csv") %>%
dplyr::filter(Country=="NO") %>%
dplyr::select(-c(Patient))
# ALPHA DIVERSITY
metadata_alpha_ileum <- read.csv(
"../../../results/merged_sites/main_analysis/Q1/alpha_diversity/alpha_indices_terminal_ileum.csv") %>%
dplyr::filter(Country=="NO")
metadata_alpha_colon <- read.csv(
"../../../results/merged_sites/main_analysis/Q1/alpha_diversity/alpha_indices_colon.csv") %>%
dplyr::filter(Country=="NO")
metadata_alpha <- rbind(metadata_alpha_ileum,metadata_alpha_colon) %>%
mutate(PatientID=Patient) %>%
dplyr::select(-c(Patient, Group))
# MERGING
metadata_no <- full_join(metadata_clinical, metadata_dysbiosis, by=c("SampleID","Matrix","PatientID","Group","Country"))
metadata_no <- full_join(metadata_no, metadata_alpha, by=c("SampleID","PatientID","Country"))
metadata_no$Group <- factor(metadata_no$Group,levels = c("healthy","pre_ltx","non-rPSC","rPSC"))1.3.3 Merging metadata
metadata_cz %<>% dplyr::mutate(Calprotectin=Fecal.calprotectin,
AOM=AOM_score,
APRI=APRI_score,
FIB4=FIB4_score) %>%
dplyr::select(SampleID,Matrix,PatientID,Group,Country,Bilirubin,ALP,Calprotectin,
MAYO_PSC,AOM,APRI,FIB4,Platelets,AST,Creatinine,
Albumin,ALT,PSC_IBD,GGT,INR,Albumin,Nancy_max,eMAYO,MAYO_dai,
dys_unfiltered_asv,dys_unfiltered_genus,
dys_filtered_asv,dys_filtered_genus,
Observe,Shannon,Simpson,Pielou)
metadata_no %<>% dplyr::mutate(Platelets=TRC,
Creatinine=Kreatinin,
MAYO_PSC=Mayo_score,
AST=ASAT/60,
ALT=ALAT/60,
PSC_IBD=IBD,
Bilirubin=Bilirubin*17.1,
ALP=ALP/60,
Albumin=Albumin*10) %>%
dplyr::mutate(PSC_IBD = case_when(
PSC_IBD == "no_ibd" ~ "0",
PSC_IBD == "ibd" ~ "1",
TRUE ~ Group # Keep other values as is
)) %>%
dplyr::select(SampleID,Matrix,PatientID,Group,Country,Bilirubin,ALP,Calprotectin,
MAYO_PSC,AOM,APRI,FIB4,Platelets,AST,Creatinine,
Albumin,ALT,PSC_IBD,
dys_unfiltered_asv,dys_unfiltered_genus,
dys_filtered_asv,dys_filtered_genus,
Observe,Shannon,Simpson,Pielou)
metadata_final <- merge(metadata_cz,metadata_no,all = TRUE)
metadata_final$Calprotectin[metadata_final$Calprotectin==">6000"] <- 6000
metadata_final$Calprotectin <- as.numeric(metadata_final$Calprotectin)1.4 CHI SQUARE TEST
1.4.1 pre_LTx, rPSC, non-rPSC
cz <- cz %>% dplyr::group_by(PSC_IBD,Group) %>%
distinct(PatientID, .keep_all = TRUE) %>%
count() %>% drop_na() %>%
dplyr::mutate(PSC_IBD = case_when(
PSC_IBD == "0" ~ "no_ibd",
PSC_IBD == "1" ~ "ibd",
)) %>% `colnames<-`(c("IBD","Group","n"))
no <- no %>% dplyr::group_by(IBD,Group) %>%
distinct(subjectid, .keep_all = TRUE) %>%
count() %>% drop_na()
# Summarize by IBD and Group
data <- bind_rows(cz, no) %>%
group_by(IBD, Group) %>%
summarise(n = sum(n), .groups = "drop") %>%
pivot_wider(names_from = Group, values_from = n) %>%
column_to_rownames("IBD") %>%
as.matrix()
chi_res <- chisq.test(data)
chi_res$expected non-rPSC pre_ltx rPSC
ibd 101.12774 96.11314 31.759124
no_ibd 19.87226 18.88686 6.240876
chi_res$observed non-rPSC pre_ltx rPSC
ibd 103 91 35
no_ibd 18 24 3
chi_res
Pearson's Chi-squared test
data: data
X-squared = 3.881, df = 2, p-value = 0.1436
1.4.2 pre_LTx + rPSC (PSC patients), non-rPSC
data2 <- data %>% as.data.frame() %>%
dplyr::mutate(PSC=pre_ltx+rPSC) %>%
dplyr::select(PSC,`non-rPSC`) %>% as.matrix()
chi_res <- chisq.test(data2)
chi_res$expected PSC non-rPSC
ibd 127.87226 101.12774
no_ibd 25.12774 19.87226
chi_res$observed PSC non-rPSC
ibd 126 103
no_ibd 27 18
chi_res
Pearson's Chi-squared test with Yates' continuity correction
data: data2
X-squared = 0.20305, df = 1, p-value = 0.6523
1.4.3 rPSC, non-rPSC
data3 <- data %>% as.data.frame() %>%
dplyr::select(rPSC,`non-rPSC`) %>% as.matrix()
chi_res <- chisq.test(data3)
chi_res$expected rPSC non-rPSC
ibd 32.981132 105.01887
no_ibd 5.018868 15.98113
chi_res$observed rPSC non-rPSC
ibd 35 103
no_ibd 3 18
chi_res
Pearson's Chi-squared test with Yates' continuity correction
data: data3
X-squared = 0.69593, df = 1, p-value = 0.4042
2 Data Analysis - Terminal ileum
segment="terminal_ileum"2.1 Filtering
Rules:
sequencing depth > 10000
nearZeroVar() with default settings
Rarefaction Curve
path="../../../intermediate_files/rarecurves"
seq_depth_threshold <- 10000ps <- construct_phyloseq(ileum_asv_tab,ileum_taxa_tab,ileum_metadata)
rareres <- get_rarecurve(obj=ps, chunks=500)
save(rareres,file = file.path(path,"rarefaction_ileum.Rdata"))load(file.path(path,"rarefaction_ileum.Rdata"))
prare <- ggrarecurve(obj=rareres,
factorNames="Country",
indexNames=c("Observe")) +
theme_bw() +
theme(axis.text=element_text(size=8), panel.grid=element_blank(),
strip.background = element_rect(colour=NA,fill="grey"),
strip.text.x = element_text(face="bold")) +
geom_vline(xintercept = seq_depth_threshold,
linetype="dashed",
color = "red") +
xlim(0, 20000)The color has been set automatically, you can reset it manually by adding scale_color_manual(values=yourcolors)
prareLibrary size
read_counts(ileum_asv_tab, line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
2.1.1 Sequencing depth
data_filt <- seq_depth_filtering(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
seq_depth_threshold = 10000)Removing 82 ASV(s)
filt_ileum_asv_tab <- data_filt[[1]]; alpha_ileum_asv_tab <- filt_ileum_asv_tab
filt_ileum_taxa_tab <- data_filt[[2]]; alpha_ileum_taxa_tab <- filt_ileum_taxa_tab
filt_ileum_metadata <- data_filt[[3]]; alpha_ileum_metadata <- filt_ileum_metadata
seq_step <- dim(filt_ileum_asv_tab)[1]Library size
read_counts(filt_ileum_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
2.1.2 NearZeroVar
data_filt <- nearzerovar_filtering(filt_ileum_asv_tab,
filt_ileum_taxa_tab,
filt_ileum_metadata)
filt_ileum_asv_tab <- data_filt[[1]]
filt_ileum_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_ileum_asv_tab)[1]Library size
read_counts(filt_ileum_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
2.1.3 Final Counts
final_counts_filtering(ileum_asv_tab,
filt_ileum_asv_tab,
filt_ileum_metadata,
seq_step, 0, nearzero_step) %>% `colnames<-`("Count") Count
Raw data: ASVs 2836
Raw data: Samples 108
Sequencing depth filt: ASVs 2754
Prevalence filt: ASVs 0
NearZeroVar filt: ASVs 994
Filt data: ASVs 994
Filt data: Samples 103
Filt data: Patients 103
Filt data: Patients.1 0
Filtered samples 5
Matrices TI
healthy 0
non-rPSC 0
rPSC 0
pre_ltx 0
post_ltx 0
ETOH 0
2.2 Alpha diversity
path = "../../../results/merged_sites/main_analysis/Q3/alpha_diversity"Calculation
# Construct MPSE object
alpha_ileum_metadata$Sample <- alpha_ileum_metadata$SampleID
ileum_mpse <- as.MPSE(construct_phyloseq(alpha_ileum_asv_tab,
alpha_ileum_taxa_tab,
alpha_ileum_metadata))
ileum_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)
# Calculate alpha diversity - rarefied counts
ileum_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)alpha_div_plots <- list()
# preparing data frame
alpha_data <- data.frame(SampleID=ileum_mpse$Sample.x,
Observe=ileum_mpse$Observe,
Shannon=ileum_mpse$Shannon,
Simpson=ileum_mpse$Simpson,
Pielou=ileum_mpse$Pielou,
Group=ileum_mpse$Group,
Country=ileum_mpse$Country,
Patient=ileum_mpse$Patient)
write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
row.names = FALSE)alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))2.2.1 Plots
p_boxplot_alpha <- alpha_diversity_countries(alpha_data,
show_legend = FALSE,
size=1,gap=FALSE)Using SampleID, Group, Country, Patient as id variables
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country, Patient as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha
# see the results
p_boxplot_alpha2.2.2 Linear Model
path = "../../../results/merged_sites/main_analysis/Q3/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))Richness
results_model <- pairwise.lm(formula = "Observe ~ Group * Country",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_observe <- results_model[[1]]
results_model_observe_emeans <- results_model[[2]]
} else {
results_model_observe <- results_model
results_model_observe_emeans <- NA
}
# save the results
pc_observed <- list();
pc_observed[[segment]] <- results_model_observe# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | -41.186 | 24.041 | -1.713 | 0.090 |
| no_ibd , ibd - CZ vs NO | -25.636 | 27.443 | -0.934 | 0.352 |
| no_ibd vs Groupibd:CountryNO | 34.572 | 29.532 | 1.171 | 0.245 |
knitr::kable(results_model_observe_emeans,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Shannon
results_model <- pairwise.lm(formula = "Shannon ~ Group * Country",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_shannon <- results_model[[1]]
results_model_shannon_emeans <- results_model[[2]]
} else {
results_model_shannon <- results_model
results_model_shannon_emeans <- NA
}
# save the results
pc_shannon <- list();
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | -0.438 | 0.325 | -1.349 | 0.180 |
| no_ibd , ibd - CZ vs NO | -0.453 | 0.371 | -1.221 | 0.225 |
| no_ibd vs Groupibd:CountryNO | 0.471 | 0.399 | 1.180 | 0.241 |
knitr::kable(results_model_shannon_emeans,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Simpson
results_model <- pairwise.lm(formula = "Simpson ~ Group * Country",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_simpson <- results_model[[1]]
results_model_simpson_emeans <- results_model[[2]]
} else {
results_model_simpson <- results_model
results_model_simpson_emeans <- NA
}
# save the results
pc_simpson <- list();
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | -0.043 | 0.043 | -1.004 | 0.318 |
| no_ibd , ibd - CZ vs NO | -0.047 | 0.049 | -0.974 | 0.332 |
| no_ibd vs Groupibd:CountryNO | 0.045 | 0.052 | 0.865 | 0.389 |
knitr::kable(results_model_simpson_emeans,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Pielou
results_model <- pairwise.lm(formula = "Pielou ~ Group * Country",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_pielou <- results_model[[1]]
results_model_pielou_emeans <- results_model[[2]]
} else {
results_model_pielou <- results_model
results_model_pielou_emeans <- NA
}
# save the results
pc_pielou <- list();
pc_pielou[[segment]] <- as.data.frame(results_model_pielou)# see the results
knitr::kable(results_model_pielou,digits = 3,
caption = "Raw results of linear model of Pielou estimation.")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | -0.042 | 0.046 | -0.910 | 0.365 |
| no_ibd , ibd - CZ vs NO | -0.068 | 0.052 | -1.305 | 0.195 |
| no_ibd vs Groupibd:CountryNO | 0.056 | 0.056 | 1.007 | 0.317 |
knitr::kable(results_model_pielou_emeans,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
2.2.3 Saving results
alpha_list <- list(
Richness=pc_observed[[segment]],
Shannon=pc_shannon[[segment]])
write.xlsx(alpha_list,
file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))2.3 Beta diversity
Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.
2.3.1 Main analysis
Genus level, Aitchison distance
level="genus"path = "../../../results/merged_sites/main_analysis/Q3/beta_diversity"pairwise_aitchison_raw <- list()
pca_plots_list <- list()Aggregation, filtering
# Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level=level,
names=TRUE)
# Filtration
filt_data <- filtering_steps(genus_data[[1]],
genus_data[[2]],
ileum_metadata,
seq_depth_threshold=10000)Removing 6 ASV(s)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata <- filt_data[[3]]2.3.1.0.1 PERMANOVA
pairwise_df <- filt_ileum_genus_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,
filt_ileum_metadata$Group,
covariate = filt_ileum_metadata$Country, sim.method = "robust.aitchison", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_ileum_metadata$Group,covariate = filt_ileum_metadata$Country, interaction = TRUE, sim.method = "robust.aitchison", p.adjust.m="BH")
# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <- rbind(pp_factor,pp_cov,pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 204.897 | 1.148 | 0.011 | 0.175 | 0.175 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 562.985 | 3.155 | 0.03 | 0.001 | 0.001 | *** |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 168.942 | 0.946 | 0.009 | 0.564 | 0.564 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.1]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_ileum_metadata$Group,
covariate = filt_ileum_metadata$Country,
group1 = group1,
group2 = group2)
print(result_list)
}
}2.3.1.0.2 Plots
PCA
p <- pca_plot_custom(filt_ileum_genus_tab,
filt_ileum_genus_taxa,
filt_ileum_metadata,
show_boxplots = TRUE,
variable = "Group", size=3, show_legend=FALSE)
# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p
# see the results
p2.3.1.1 Saving results
write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]],
file = file.path(path,
paste0("beta_diversity_results_", segment,".xlsx")))2.4 Univariate Analysis
2.4.1 Main analysis
level="genus"# needed paths
path = "../../../results/merged_sites/main_analysis/Q3/univariate_analysis"
path_maaslin=file.path("../../../intermediate_files/maaslin/Q3",level)# variables
raw_linda_results_genus <- list();
raw_linda_results_genus[[segment]] <- list()
linda_results_genus <- list();
linda_results_genus[[segment]] <- list()
# country and interaction problems
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()
# workbook for final df
wb <- createWorkbook()
# PSC effect
psc_effect <- list()Aggregate taxa
genus_data <- aggregate_taxa(ileum_asv_tab,
ileum_taxa_tab,
taxonomic_level = level)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa_tab <- genus_data[[2]]
ileum_genus_asv_taxa_tab <- create_asv_taxa_table(ileum_genus_tab,
ileum_genus_taxa_tab)2.4.1.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])2.4.1.1.1 linDA
# prepare the data
linda_data <- binomial_prep(ileum_genus_tab,
ileum_genus_taxa_tab,
ileum_metadata,
group, usage="linDA")Removing 6 ASV(s)
filt_ileum_uni_data <- linda_data[[1]]
filt_ileum_uni_taxa <- linda_data[[2]]
filt_ileum_uni_metadata <- linda_data[[3]]
# fit the model
linda.obj <- linda(filt_ileum_uni_data,
filt_ileum_uni_metadata,
formula = '~ Group * Country')0 features are filtered!
The filtered data has 103 samples and 236 features will be tested!
Warning in linda(filt_ileum_uni_data, filt_ileum_uni_metadata, formula = "~ Group * Country"): Some features have less than 3 nonzero values!
They have virtually no statistical power. You may consider filtering them in the analysis!
Imputation approach is used.
Fit linear models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)
# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO")
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")
for (grp in c(group1,group2,group3)){
raw_linda_results_genus[[segment]][[grp]] <-
rawlinda.df(linda.output,
grp,
filt_ileum_uni_data,
filt_ileum_uni_taxa)
linda_results_genus[[segment]][[grp]] <-
linda.df(linda.output,
grp,
filt_ileum_uni_data,
filt_ileum_uni_taxa)
}# volcano plot
volcano_1 <- volcano_plot_linda(linda.output,
group1,
taxa_table = filt_ileum_uni_taxa) +
ggtitle(comparison_title)
volcano_2 <- volcano_plot_linda(linda.output, group2,
taxa_table = filt_ileum_uni_taxa) +
ggtitle("NO vs CZ")
volcano_3 <- volcano_plot_linda(linda.output, group3,
taxa_table = filt_ileum_uni_taxa) +
ggtitle("Interaction effect")
volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
volcanovolcano_linda_ileum <- volcano_1 +
ggtitle("IBD vs no_IBD") +
xlab("Effect size (LinDA)") +
ylab("–log"[10]~"(q-value)")
volcano_linda_ileum2.4.1.1.2 MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa) +
ggtitle(comparison_title)
volcano2 <- volcano_plot_maaslin(fit_data,filt_ileum_uni_taxa,variable="Country") +
ggtitle("Country effect")
volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcanovolcano_maaslin_ileum <- volcano1 +
ggtitle("IBD vs no_IBD") +
xlab("Effect size (MaAsLin2)") +
ylab("–log"[10]~"(q-value)")
volcano_maaslin_ileum2.4.1.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
linda.output, fit_data,
raw_linda_results_genus,
segment = segment,
level=level)
list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]
# show the results
venn2.4.1.1.4 Interaction effect
list_interaction_significant <- country_interaction(group,
linda.output,
list_intersections,
filt_ileum_uni_data,
filt_ileum_uni_metadata,
segment=segment,
level=level)
# see the result
## significant interaction effect
list_interaction_significant[[1]]NULL
## results for czech cohort
list_interaction_significant[[2]][1] NA
## results for norwegian cohort
list_interaction_significant[[3]][1] NA
Removing problematic taxa
list_intersections <- removing_interaction_problems(group,
list_interaction_significant,
list_intersections,
segment=segment,
level=level)2.4.1.1.5 Basic statistics
uni_df <- merge(basic_univariate_statistics(linda_data,group),
raw_linda_results_genus[[segment]][[group1]],
by="SeqID",all=TRUE)[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df
# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)2.4.1.2 Visualization
Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.
list_heatmap <- list_intersections[grep(paste(segment,level),
names(list_intersections),value=TRUE)]
if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,ileum_taxa_tab)
p_heatmap_linda
}Dotheatmap
if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
ileum_taxa_tab)
dotheatmap_linda
}Horizontal bar plot
if (length((list_heatmap[[1]][[1]]))>0){
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))
p_prevalence_final <- ggarrange(p_prevalence,
ggplot() + theme_minimal(),
nrow = 2,heights = c(1,0.085))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.2))
p
}2.4.1.3 Saving results
# ALL DATA
saveWorkbook(wb,file.path(path,paste0("uni_analysis_wb_",segment,".xlsx")),
overwrite = TRUE)
# PSC effect
write.xlsx(psc_effect[[paste(segment,level)]],file.path(path,paste0("psc_effect_",segment,".xlsx")))
# SIGNIFICANT taxa
write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
`names<-`(gsub(segment, "", names(
list_intersections[grepl(segment,names(list_intersections))]))),
file.path(path,paste0("significant_taxa_",segment,".xlsx")))2.5 Results overview
2.5.0.1 Alpha diversity
knitr::kable(pc_observed[[segment]],
digits = 3,
caption = "Results of linear model testing ASV Richness")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | -41.186 | 24.041 | -1.713 | 0.090 |
| no_ibd , ibd - CZ vs NO | -25.636 | 27.443 | -0.934 | 0.352 |
| no_ibd vs Groupibd:CountryNO | 34.572 | 29.532 | 1.171 | 0.245 |
knitr::kable(pc_shannon[[segment]],
digits = 3,
caption = "Results of linear model testing Shannon index")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | -0.438 | 0.325 | -1.349 | 0.180 |
| no_ibd , ibd - CZ vs NO | -0.453 | 0.371 | -1.221 | 0.225 |
| no_ibd vs Groupibd:CountryNO | 0.471 | 0.399 | 1.180 | 0.241 |
knitr::kable(pc_simpson[[segment]],
digits = 3,
caption = "Results of linear model testing Simpson index")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | -0.043 | 0.043 | -1.004 | 0.318 |
| no_ibd , ibd - CZ vs NO | -0.047 | 0.049 | -0.974 | 0.332 |
| no_ibd vs Groupibd:CountryNO | 0.045 | 0.052 | 0.865 | 0.389 |
knitr::kable(pc_pielou[[segment]],
digits = 3,
caption = "Results of linear model testing Pielou index")| Estimate | Std. Error | t value | Pr(>|t|) | |
|---|---|---|---|---|
| no_ibd vs Groupibd | -0.042 | 0.046 | -0.910 | 0.365 |
| no_ibd , ibd - CZ vs NO | -0.068 | 0.052 | -1.305 | 0.195 |
| no_ibd vs Groupibd:CountryNO | 0.056 | 0.056 | 1.007 | 0.317 |
Plots
alpha_div_plots[[paste(segment,"Country")]]2.5.0.2 Beta diversity
Main results
knitr::kable(pairwise_aitchison_raw[[paste("genus", segment)]],
digits = 3,
caption = "Results of PERMANOVA - robust aitchison distance")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 204.897 | 1.148 | 0.011 | 0.175 | 0.175 | |
| no_ibd vs ibd , Country | 1 | 562.985 | 3.155 | 0.030 | 0.001 | 0.001 | *** |
| no_ibd vs ibd : Country | 1 | 168.942 | 0.946 | 0.009 | 0.564 | 0.564 |
PCA
pca_plots_list[[paste(segment,"genus custom")]]2.5.0.3 Univariate analysis
Number of significant taxa
knitr::kable(as.data.frame(lapply(list_intersections,nrow)) %>% t() %>%
`colnames<-`("Count") %>%
`rownames<-`(c(names(list_intersections))),caption="Number of significant taxa")| Count | |
|---|---|
| terminal_ileum genus no_ibd vs ibd | 0 |
3 Data Analysis - Colon
segment="colon"3.1 Filtering
Rules: - prevalence > 5% (per group) - nearZeroVar with default settings - sequencing depth > 5000 - taxonomic assignment at least order
Library size
read_counts(colon_asv_tab, line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
3.1.1 Sequencing depth
data_filt <- seq_depth_filtering(colon_asv_tab,
colon_taxa_tab,
colon_metadata,
seq_depth_threshold = 10000)Removing 43 ASV(s)
filt_colon_asv_tab <- data_filt[[1]]; alpha_colon_asv_tab <- filt_colon_asv_tab
filt_colon_taxa_tab <- data_filt[[2]]; alpha_colon_taxa_tab <- filt_colon_taxa_tab
filt_colon_metadata <- data_filt[[3]]; alpha_colon_metadata <- filt_colon_metadata
seq_step <- dim(filt_colon_asv_tab)[1]Library size
read_counts(filt_colon_asv_tab,line = c(10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
3.1.2 NearZeroVar
data_filt <- nearzerovar_filtering(filt_colon_asv_tab,
filt_colon_taxa_tab,
filt_colon_metadata)
filt_colon_asv_tab <- data_filt[[1]]
filt_colon_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_colon_asv_tab)[1]Library size
read_counts(filt_colon_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Check zero depth
data_filt <- check_zero_depth(filt_colon_asv_tab,
filt_colon_taxa_tab,
filt_colon_metadata)
filt_colon_asv_tab <- data_filt[[1]];
filt_colon_taxa_tab <- data_filt[[2]];
filt_colon_metadata <- data_filt[[3]]; Library size
read_counts(filt_colon_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
3.1.3 Final Counts
final_counts_filtering(colon_asv_tab,
filt_colon_asv_tab,
filt_colon_metadata,
seq_step, 0, nearzero_step) V1
Raw data: ASVs 4084
Raw data: Samples 350
Sequencing depth filt: ASVs 4041
Prevalence filt: ASVs 0
NearZeroVar filt: ASVs 354
Filt data: ASVs 354
Filt data: Samples 337
Filt data: Patients 143
Filt data: Patients.1 0
Filtered samples 13
Matrices CA;CD;SI;Cecum;Rectum
healthy 0
non-rPSC 0
rPSC 0
pre_ltx 0
post_ltx 0
ETOH 0
3.2 Alpha diversity
path = "../../../results/merged_sites/main_analysis/Q3/alpha_diversity"Calculation
# Construct MPSE object
alpha_colon_metadata$Sample <- alpha_colon_metadata$SampleID
colon_mpse <- as.MPSE(construct_phyloseq(alpha_colon_asv_tab,
alpha_colon_taxa_tab,
alpha_colon_metadata))
colon_mpse %<>% mp_rrarefy(raresize = 10000,seed = 123)
# Calculate alpha diversity - rarefied counts
colon_mpse %<>% mp_cal_alpha(.abundance=RareAbundance, force=TRUE)alpha_data <- data.frame(SampleID=colon_mpse$Sample.x,
Observe=colon_mpse$Observe,
Shannon=colon_mpse$Shannon,
Simpson=colon_mpse$Simpson,
Pielou=colon_mpse$Pielou,
Group=colon_mpse$Group,
Country=colon_mpse$Country,
Patient=colon_mpse$Patient)
write.csv(alpha_data,file.path(path,paste0("alpha_indices_",segment,".csv")),
row.names = FALSE)3.2.1 Plots
p_boxplot_alpha <- alpha_diversity_countries(alpha_data,
show_legend = FALSE,
size=1,gap=FALSE)Using SampleID, Group, Country, Patient as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Using SampleID, Group, Country, Patient as id variables
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
# save the results
alpha_div_plots[[paste(segment,"Country")]] <- p_boxplot_alpha
# see the results
p_boxplot_alpha3.2.2 Linear Model
path = "../../../results/merged_sites/main_analysis/Q3/alpha_diversity"
alpha_data <- read.csv(file.path(path,paste0("alpha_indices_",segment,".csv")))Richness
results_model <- pairwise.lmer(
formula = "Observe ~ Group * Country + (1|Patient)",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_observe <- results_model[[1]]
results_model_observe_detailed <- results_model[[2]]
} else {
results_model_observe <- results_model
results_model_observe_detailed <- NA
}
# save the results
pc_observed[[segment]] <- results_model_observe# see the results
knitr::kable(results_model_observe,digits = 3,
caption = "Raw results of linear model of richness estimation.")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| no_ibd vs Groupibd | 0.195 | 13.828 | 330.245 | 0.014 | 0.989 | 0.989 | |
| no_ibd vs ibd - CZ vs NO | -0.556 | 18.199 | 238.733 | -0.031 | 0.976 | 0.976 | |
| no_ibd vs Groupibd:CountryNO | -1.880 | 18.971 | 263.174 | -0.099 | 0.921 | 0.921 |
knitr::kable(results_model_observe_detailed,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Shannon
results_model <- pairwise.lmer(
formula = "Shannon ~ Group * Country + (1|Patient)",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_shannon <- results_model[[1]]
results_model_shannon_detailed <- results_model[[2]]
} else {
results_model_shannon <- results_model
results_model_shannon_detailed <- NA
}
# save the results
pc_shannon[[segment]] <- as.data.frame(results_model_shannon)# see the results
knitr::kable(results_model_shannon,digits = 3,
caption = "Raw results of linear model of Shannon estimation.")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| no_ibd vs Groupibd | -0.038 | 0.204 | 332.621 | -0.185 | 0.854 | 0.854 | |
| no_ibd vs ibd - CZ vs NO | -0.139 | 0.262 | 239.592 | -0.529 | 0.597 | 0.597 | |
| no_ibd vs Groupibd:CountryNO | 0.101 | 0.275 | 262.547 | 0.367 | 0.714 | 0.714 |
knitr::kable(results_model_shannon_detailed,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
Simpson
results_model <- pairwise.lmer(
formula = "Simpson ~ Group * Country + (1|Patient)",
factors=alpha_data$Group,
data=alpha_data)
# check interaction
if (!is.data.frame(results_model)){
results_model_simpson <- results_model[[1]]
results_model_simpson_detailed <- results_model[[2]]
} else {
results_model_simpson <- results_model
results_model_simpson_detailed <- NA
}
# save the results
pc_simpson[[segment]] <- as.data.frame(results_model_simpson)# see the results
knitr::kable(results_model_simpson,digits = 3,
caption = "Raw results of linear model of Simpson estimation.")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| no_ibd vs Groupibd | -0.015 | 0.033 | 332.974 | -0.447 | 0.656 | 0.656 | |
| no_ibd vs ibd - CZ vs NO | -0.036 | 0.042 | 240.107 | -0.860 | 0.391 | 0.391 | |
| no_ibd vs Groupibd:CountryNO | 0.035 | 0.044 | 261.852 | 0.797 | 0.426 | 0.426 |
knitr::kable(results_model_simpson_detailed,digits = 3,
caption = "Raw results of independent country analysis")Table: Raw results of independent country analysis
3.2.3 Saving results
alpha_list <- list(
Richness=pc_observed[[segment]] %>% rownames_to_column("Comparison"),
Shannon=pc_shannon[[segment]] %>% rownames_to_column("Comparison"),
Simpson=pc_simpson[[segment]] %>% rownames_to_column("Comparison"))
write.xlsx(alpha_list,
file = file.path(path,paste0("alpha_diversity_results_",segment,".xlsx")))3.3 Beta diversity
Calculating Aitchison distance (euclidean distance on clr-transformed data), both at ASV and genus level.
3.3.1 Main analysis - Genus, Aitchison
Genus level, Aitchison distance
level="genus"path = "../../../results/merged_sites/main_analysis/Q3/beta_diversity"Aggregation, filtering
genus_data <- aggregate_taxa(colon_asv_tab,
colon_taxa_tab,
taxonomic_level=level,
names=TRUE)
filt_data <- filtering_steps(genus_data[[1]],
genus_data[[2]],
colon_metadata,
seq_depth_threshold=10000)Removing 7 ASV(s)
filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_genus_metadata <- filt_data[[3]]3.3.1.0.1 PERMANOVA
pairwise_df <- filt_colon_genus_tab %>% column_to_rownames("SeqID") %>% t()
# main effect
pp_main <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
covariate = filt_colon_genus_metadata$Country,
patients = filt_colon_genus_metadata$Patient,
sim.method = "robust.aitchison", p.adjust.m="BH")
# interaction
pp_int <- pairwise.adonis(pairwise_df,filt_colon_genus_metadata$Group,
covariate = filt_colon_genus_metadata$Country,
interaction = TRUE,
patients = filt_colon_genus_metadata$Patient,
sim.method = "robust.aitchison", p.adjust.m="BH")
# tidy the results
pp_factor <- pp_main[[1]]
pp_cov <- pp_main[[2]]
pp_fac.cov <- pp_int[[3]]
cols <- c("pairs","Df","SumsOfSqs", "F.Model","R2","p.value", "p.adjusted", "sig")
colnames(pp_factor) <- cols; colnames(pp_cov) <- cols; colnames(pp_fac.cov) <- cols;
# save raw results
pairwise_aitchison_raw[[paste(level, segment)]] <-rbind(pp_factor,pp_cov,pp_fac.cov)# see the results
knitr::kable(pp_factor,digits = 3,caption = "PERMANOVA, GROUP separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 170.678 | 1.097 | 0.003 | 1 | 1 |
knitr::kable(pp_cov,digits = 3,caption = "PERMANOVA, COUNTRY separation")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd , Country | 1 | 396.369 | 2.547 | 0.008 | 0.169 | 0.169 |
knitr::kable(pp_fac.cov,digits = 3,caption = "PERMANOVA, INTERACTION GROUP:Country")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd : Country | 1 | 133.144 | 0.855 | 0.003 | 0.999 | 0.999 |
Interaction check
interaction_sig <- pp_fac.cov$pairs[pp_fac.cov$p.adjusted < 0.1]
if (length(interaction_sig)>0){
for (i in 1:length(interaction_sig)){
group1 <- unlist(strsplit(interaction_sig[i],split = " vs "))[1]
group2 <- unlist(strsplit(interaction_sig[i],split = " vs "))[2]
group2 <- unlist(strsplit(group2,split = " : "))[1]
result_list <- adonis_postanalysis(x=pairwise_df,
factors = filt_colon_genus_metadata$Group,
covariate = filt_colon_genus_metadata$Country,
group1 = group1,
group2 = group2,
patients = filt_colon_genus_metadata$Patient)
print(result_list)
}
}3.3.1.0.2 Plots
PCoA custom
p <- pca_plot_custom(filt_colon_genus_tab,
filt_colon_genus_taxa,
filt_colon_genus_metadata,
show_boxplots = TRUE,
variable = "Group", size=2,
show_legend=FALSE)
# save the results
pca_plots_list[[paste(segment,level,"custom")]] <- p
# see the results
p3.3.1.1 Saving results
write.xlsx(pairwise_aitchison_raw[[paste(level, segment)]],
file = file.path(path,
paste0("beta_diversity_results_", segment,".xlsx")))3.4 Univariate Analysis
3.4.1 Main - Genus level
level="genus"# needed paths
path = "../../../results/merged_sites/main_analysis/Q3/univariate_analysis"
path_maaslin=file.path("../../../intermediate_files/maaslin/Q3",level)# variables
raw_linda_results_genus[[segment]] <- list()
linda_results_genus[[segment]] <- list()
# country and interaction problems
list_intersections <- list()
list_venns <- list()
uni_statistics <- list()
# workbook for final df
wb <- createWorkbook()
# PSC effect
psc_effect <- list()3.4.1.1 Genus level
level="genus"Aggregate taxa
genus_data <- aggregate_taxa(colon_asv_tab,
colon_taxa_tab,
taxonomic_level = level)
colon_genus_tab <- genus_data[[1]]
colon_genus_taxa_tab <- genus_data[[2]]
colon_genus_asv_taxa_tab <- create_asv_taxa_table(colon_genus_tab,
colon_genus_taxa_tab)3.4.1.1.1 ibd vs no_ibd
group <- c("no_ibd","ibd")
comparison_name <- paste0(group[1], " vs ",group[2])3.4.1.1.1.1 linda
# prepare the data
linda_data <- binomial_prep(colon_genus_tab,
colon_genus_taxa_tab,
colon_metadata,
group, usage="linDA")Removing 7 ASV(s)
filt_colon_uni_data <- linda_data[[1]]
filt_colon_uni_taxa <- linda_data[[2]]
filt_colon_uni_metadata <- linda_data[[3]]
# fit the model
linda.obj <- linda(filt_colon_uni_data,
filt_colon_uni_metadata,
formula = '~ Group * Country + (1|Patient)')0 features are filtered!
The filtered data has 337 samples and 159 features will be tested!
Imputation approach is used.
Fit linear mixed effects models ...
Completed.
linda.output <- linda.obj$output
linda.output <- linda_renaming(linda.output, group)
# save the results
group1 <- paste0(group[1], " vs ","Group",group[2])
group2 <- paste0(group[1], " , ",group[2], " - CZ vs NO")
group3 <- paste0(group[1], " vs ","Group",group[2], ":CountryNO")
for (grp in c(group1,group2,group3)){
raw_linda_results_genus[[segment]][[grp]] <-
rawlinda.df(linda.output,
grp,
filt_colon_uni_data,
filt_colon_uni_taxa)
linda_results_genus[[segment]][[grp]] <-
linda.df(linda.output,
grp,
filt_colon_uni_data,
filt_colon_uni_taxa)
}# volcano plot
volcano_1 <- volcano_plot_linda(linda.output, group1,
taxa_table = filt_colon_uni_taxa) +
ggtitle(paste(group,collapse=" vs "))
volcano_2 <- volcano_plot_linda(linda.output, group2,
taxa_table = filt_colon_uni_taxa) +
ggtitle("NO vs CZ")
volcano_3 <- volcano_plot_linda(linda.output, group3,
taxa_table = filt_colon_uni_taxa) +
ggtitle("Interaction effect")
volcano <- ggarrange(volcano_1,volcano_2,volcano_3, ncol=3)
# see the plot
volcanovolcano_linda_colon <- volcano_1 +
ggtitle("IBD vs no_IBD") +
xlab("Effect size (LinDA)") +
ylab("–log"[10]~"(q-value)")
volcano_linda_ileum3.4.1.1.1.2 MaAsLin2
volcano1 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa) +
ggtitle(paste(group[1], "vs", group[2]))
volcano2 <- volcano_plot_maaslin(fit_data,filt_colon_uni_taxa,variable="Country") +
ggtitle("Country effect")
volcano <- ggarrange(volcano1,volcano2, ncol=2)
volcanovolcano_maaslin_colon <- volcano1 +
ggtitle("IBD vs no_IBD") +
xlab("Effect size (MaAsLin2)") +
ylab("–log"[10]~"(q-value)")
volcano_maaslin_colon3.4.1.1.1.3 Group - Intersection
intersection_results <- group_intersection(group, list_intersections, list_venns,
linda.output, fit_data,
raw_linda_results_genus,
segment = segment,
level=level)
list_intersections <- intersection_results[[1]]
list_venns <- intersection_results[[2]]
venn <- intersection_results[[3]]
# show the results
venn3.4.1.1.1.4 Interaction effect
list_interaction_significant <- country_interaction(group,
linda.output,
list_intersections,
filt_colon_uni_data,
filt_colon_uni_metadata,
segment=segment,
level=level)
# see the result
## significant interaction effect
list_interaction_significant[[1]]NULL
## results for czech cohort
list_interaction_significant[[2]][1] NA
## results for norwegian cohort
list_interaction_significant[[3]][1] NA
Removing problematic taxa
list_intersections <- removing_interaction_problems(group,
list_interaction_significant,
list_intersections,
segment=segment,
level=level)3.4.1.1.2 Basic statistics
uni_df <- merge(basic_univariate_statistics(linda_data,group),
raw_linda_results_genus[[segment]][[group1]],
by="SeqID",all=TRUE)[1] "no_ibd"
[1] "ibd"
[1] "no_ibd"
[1] "ibd"
uni_df[["final_sig"]] <- uni_df$SeqID %in% list_intersections[[paste(segment,level,comparison_name)]][["SeqID"]]
uni_statistics[[segment]][[paste(level,comparison_name)]] <- uni_df
# for comparison
new_name <- comparison_name
addWorksheet(wb, sheetName = new_name)
writeData(wb, sheet = new_name, uni_df, rowNames=FALSE)3.4.1.1.3 Visualization
Heatmap visualizing the linDA’s logFoldChange for taxa with p < 0.1.
list_heatmap <- list_intersections[grep(paste(segment,level),
names(list_intersections),value=TRUE)]
if (length((list_heatmap[[1]][[1]]))>1){
p_heatmap_linda <- heatmap_linda(list_heatmap,colon_taxa_tab)
p_heatmap_linda
}Dot heatmap
if (length((list_heatmap[[1]][[1]]))>0){
dotheatmap_linda <- dot_heatmap_linda(list_heatmap,
uni_statistics$terminal_ileum[grepl(level,names(uni_statistics$terminal_ileum))],
colon_taxa_tab)
dotheatmap_linda
}Horizontal bar plot
if (length((list_heatmap[[1]][[1]]))>0){
p_prevalence <- horizontal_barplot(wb,taxa=levels(dotheatmap_linda$data$SeqID))
p_prevalence_final <- ggarrange(p_prevalence,
ggplot() + theme_minimal(),
nrow = 2,heights = c(1,0.085))
p <- ggarrange(dotheatmap_linda + theme(legend.position = "none"),p_prevalence_final,ncol=2,widths = c(1,0.2))
p
}3.4.1.2 Saving results
# ALL DATA
saveWorkbook(wb,file.path(path,paste0("uni_analysis_wb_",segment,".xlsx")),
overwrite = TRUE)
# SIGNIFICANT taxa
write.xlsx(list_intersections[grepl(segment,names(list_intersections))] %>%
`names<-`(gsub(segment, "", names(
list_intersections[grepl(segment,names(list_intersections))]))),
file.path(path,paste0("significant_taxa_",segment,".xlsx")))3.5 Results overview
3.5.0.1 Alpha diversity
knitr::kable(pc_observed[[segment]],
digits = 3,
caption = "Results of linear model testing ASV Richness")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| no_ibd vs Groupibd | 0.195 | 13.828 | 330.245 | 0.014 | 0.989 | 0.989 | |
| no_ibd vs ibd - CZ vs NO | -0.556 | 18.199 | 238.733 | -0.031 | 0.976 | 0.976 | |
| no_ibd vs Groupibd:CountryNO | -1.880 | 18.971 | 263.174 | -0.099 | 0.921 | 0.921 |
knitr::kable(pc_shannon[[segment]],
digits = 3,
caption = "Results of linear model testing Shannon index")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| no_ibd vs Groupibd | -0.038 | 0.204 | 332.621 | -0.185 | 0.854 | 0.854 | |
| no_ibd vs ibd - CZ vs NO | -0.139 | 0.262 | 239.592 | -0.529 | 0.597 | 0.597 | |
| no_ibd vs Groupibd:CountryNO | 0.101 | 0.275 | 262.547 | 0.367 | 0.714 | 0.714 |
knitr::kable(pc_simpson[[segment]],
digits = 3,
caption = "Results of linear model testing Simpson index")| Estimate | Std..Error | df | t.value | Pr…t.. | p.adj | sig | |
|---|---|---|---|---|---|---|---|
| no_ibd vs Groupibd | -0.015 | 0.033 | 332.974 | -0.447 | 0.656 | 0.656 | |
| no_ibd vs ibd - CZ vs NO | -0.036 | 0.042 | 240.107 | -0.860 | 0.391 | 0.391 | |
| no_ibd vs Groupibd:CountryNO | 0.035 | 0.044 | 261.852 | 0.797 | 0.426 | 0.426 |
knitr::kable(pc_pielou[[segment]],
digits = 3,
caption = "Results of linear model testing Pielou index")Table: Results of linear model testing Pielou index
Plots
alpha_div_plots[[paste(segment,"Country")]]3.5.0.2 Beta diversity
Main results
knitr::kable(pairwise_aitchison_raw[[paste("genus", segment)]],
digits = 3,
caption = "Results of PERMANOVA - robust aitchison distance")| pairs | Df | SumsOfSqs | F.Model | R2 | p.value | p.adjusted | sig |
|---|---|---|---|---|---|---|---|
| no_ibd vs ibd | 1 | 170.678 | 1.097 | 0.003 | 1.000 | 1.000 | |
| no_ibd vs ibd , Country | 1 | 396.369 | 2.547 | 0.008 | 0.169 | 0.169 | |
| no_ibd vs ibd : Country | 1 | 133.144 | 0.855 | 0.003 | 0.999 | 0.999 |
PCA
pca_plots_list[[paste(segment,"genus custom")]]3.5.0.3 Univariate analysis
Number of significant taxa
knitr::kable(as.data.frame(lapply(list_intersections,nrow)) %>% t() %>%
`colnames<-`("Count") %>%
`rownames<-`(c(names(list_intersections))),caption="Number of significant taxa")| Count | |
|---|---|
| colon genus no_ibd vs ibd | 0 |
4 Paper-ready visualizations
Alpha diversity
p_A <- alpha_div_plots$`terminal_ileum Country` +
ggtitle("Terminal ileum")+
theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15))
p_B <- alpha_div_plots$`colon Country` +
ggtitle("Colon") +
theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15))
Q3_alpha <- ggarrange(p_A,ggplot() + theme_minimal(),p_B,nrow=1, ncol=3,
widths = c(1,0.1,1))
Q3_alphaBeta diversity
pca_ti <- pca_plots_list$`terminal_ileum genus custom`
pca_colon <- pca_plots_list$`colon genus custom`
genus_Q3_beta <- ggarrange(pca_ti,
ggplot() + theme_minimal(),
pca_colon,ncol=3,
widths = c(1,0.1,1))
genus_Q3_betasave(Q3_alpha,genus_Q3_beta,file="../../../results/merged_sites/main_analysis/Q3/figures.RData")Alpha + Beta diversity
alpha_beta <- ggarrange(Q3_alpha,genus_Q3_beta,
ncol = 1,nrow=2,labels = c("A","B"))
alpha_beta4.1 Supplement Figure
alpha_beta <- ggarrange(alpha_beta,ggplot() + theme_minimal(),ncol=2,
widths = c(1,0.15))
alpha_betaDAA
if (exists("dot_heatmap_ileum")){
p_ileum <- dot_heatmap_ileum +
ggtitle("Terminal ileum") +
theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
legend.position = "none")
}
if (exists("dot_heatmap_colon")){
p_colon <- dot_heatmap_colon +
ggtitle("Colon") +
theme(plot.title = element_text(hjust=0.5,face = "bold",size = 15),
legend.position = "none")
}
if (exists("p_ileum") | exists("p_colon")){
heatmap_plot <- ggarrange(p_ileum,
p_colon,
ncol = 2,labels=c("A","B"))
heatmap_plot
}4.2 Supplement Figure - Akkermansia
volcano_figure <- ggarrange(
volcano_linda_ileum + ggtitle("Terminal ileum"),
volcano_linda_colon + ggtitle("Colon"),
volcano_maaslin_ileum + ggtitle(""),
volcano_maaslin_colon + ggtitle(""),
labels = c("A","","B",""))
volcano_figure5 Session info
sessionInfo()R version 4.3.1 (2023-06-16 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 11 x64 (build 26100)
Matrix products: default
locale:
[1] LC_COLLATE=Czech_Czechia.utf8 LC_CTYPE=Czech_Czechia.utf8
[3] LC_MONETARY=Czech_Czechia.utf8 LC_NUMERIC=C
[5] LC_TIME=Czech_Czechia.utf8
time zone: Europe/Prague
tzcode source: internal
attached base packages:
[1] parallel stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] mice_3.17.0 picante_1.8.2 ape_5.8-1
[4] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1
[7] tidyverse_2.0.0 kableExtra_1.4.0 tidyr_1.3.1
[10] gbm_2.2.2 doParallel_1.0.17 iterators_1.0.14
[13] foreach_1.5.2 ranger_0.17.0 ggvenn_0.1.15
[16] Maaslin2_1.16.0 purrr_1.0.2 pROC_1.18.5
[19] glmnet_4.1-8 MicrobiomeStat_1.2 caret_6.0-94
[22] openxlsx_4.2.8 magrittr_2.0.3 emmeans_1.11.2-8
[25] lmerTest_3.1-3 robustlmm_3.3-1 lme4_1.1-37
[28] Matrix_1.6-5 mgcv_1.9-1 nlme_3.1-164
[31] pheatmap_1.0.13 reshape2_1.4.4 vegan_2.6-10
[34] lattice_0.22-6 permute_0.9-8 ggplotify_0.1.2
[37] ggrepel_0.9.6 ggpubr_0.6.1 MicrobiotaProcess_1.14.1
[40] phyloseq_1.46.0 ggplot2_3.5.2 tibble_3.2.1
[43] dplyr_1.1.4 cowplot_1.2.0 readr_2.1.5
[46] igraph_2.1.4 ccrepe_1.38.1 data.table_1.15.4
loaded via a namespace (and not attached):
[1] fs_1.6.5 matrixStats_1.5.0
[3] bitops_1.0-9 RColorBrewer_1.1-3
[5] numDeriv_2016.8-1.1 tools_4.3.1
[7] backports_1.4.1 R6_2.6.1
[9] lazyeval_0.2.2 jomo_2.7-6
[11] rhdf5filters_1.14.1 withr_3.0.2
[13] gridExtra_2.3 cli_3.6.2
[15] Biobase_2.62.0 logging_0.10-108
[17] biglm_0.9-3 sandwich_3.1-1
[19] labeling_0.4.3 mvtnorm_1.3-3
[21] robustbase_0.99-4-1 pbapply_1.7-4
[23] systemfonts_1.2.2 yulab.utils_0.2.1
[25] svglite_2.1.3 parallelly_1.38.0
[27] rstudioapi_0.17.1 generics_0.1.4
[29] gridGraphics_0.5-1 shape_1.4.6.1
[31] car_3.1-3 zip_2.3.2
[33] biomformat_1.30.0 S4Vectors_0.40.2
[35] abind_1.4-8 infotheo_1.2.0.1
[37] lifecycle_1.0.4 multcomp_1.4-28
[39] yaml_2.3.10 carData_3.0-5
[41] SummarizedExperiment_1.32.0 Rtsne_0.17
[43] rhdf5_2.46.1 recipes_1.3.1
[45] SparseArray_1.2.4 grid_4.3.1
[47] mitml_0.4-5 crayon_1.5.3
[49] pillar_1.11.0 knitr_1.50
[51] optparse_1.7.5 GenomicRanges_1.54.1
[53] statip_0.2.3 boot_1.3-32
[55] estimability_1.5.1 future.apply_1.11.3
[57] codetools_0.2-20 pan_1.9
[59] glue_1.7.0 ggfun_0.2.0
[61] vctrs_0.6.5 treeio_1.26.0
[63] Rdpack_2.6.4 gtable_0.3.6
[65] gower_1.0.1 xfun_0.52
[67] rbibutils_2.3 S4Arrays_1.2.1
[69] prodlim_2024.06.25 libcoin_1.0-10
[71] coda_0.19-4.1 reformulas_0.4.1
[73] pcaPP_2.0-5 modeest_2.4.0
[75] survival_3.5-8 timeDate_4041.110
[77] hardhat_1.4.2 lava_1.8.1
[79] statmod_1.5.0 TH.data_1.1-4
[81] ipred_0.9-15 ggtree_3.10.1
[83] GenomeInfoDb_1.38.8 fBasics_4041.97
[85] rpart_4.1.23 colorspace_2.1-1
[87] BiocGenerics_0.48.1 DBI_1.2.3
[89] nnet_7.3-19 ade4_1.7-23
[91] tidyselect_1.2.1 timeSeries_4041.111
[93] compiler_4.3.1 microbiome_1.24.0
[95] xml2_1.3.8 DelayedArray_0.28.0
[97] scales_1.4.0 DEoptimR_1.1-4
[99] spatial_7.3-18 rappdirs_0.3.3
[101] digest_0.6.35 minqa_1.2.8
[103] rmarkdown_2.29 XVector_0.42.0
[105] htmltools_0.5.8.1 pkgconfig_2.0.3
[107] MatrixGenerics_1.14.0 stabledist_0.7-2
[109] fastmap_1.2.0 rlang_1.1.3
[111] htmlwidgets_1.6.4 farver_2.1.2
[113] zoo_1.8-13 jsonlite_2.0.0
[115] ModelMetrics_1.2.2.2 RCurl_1.98-1.17
[117] modeltools_0.2-24 Formula_1.2-5
[119] GenomeInfoDbData_1.2.11 patchwork_1.3.2
[121] Rhdf5lib_1.24.2 Rcpp_1.0.12
[123] ggnewscale_0.5.2 fastGHQuad_1.0.1
[125] stringi_1.8.3 ggstar_1.0.4
[127] stable_1.1.6 zlibbioc_1.48.2
[129] MASS_7.3-60.0.1 plyr_1.8.9
[131] listenv_0.9.1 Biostrings_2.70.3
[133] splines_4.3.1 hash_2.2.6.3
[135] multtest_2.58.0 hms_1.1.3
[137] ggtreeExtra_1.12.0 ggsignif_0.6.4
[139] stats4_4.3.1 rmutil_1.1.10
[141] evaluate_1.0.5 nloptr_2.2.1
[143] tzdb_0.5.0 getopt_1.20.4
[145] future_1.34.0 clue_0.3-66
[147] coin_1.4-3 broom_1.0.9
[149] xtable_1.8-4 tidytree_0.4.6
[151] rstatix_0.7.2 viridisLite_0.4.2
[153] class_7.3-22 aplot_0.2.8
[155] IRanges_2.36.0 cluster_2.1.8.1
[157] timechange_0.3.0 globals_0.18.0