source("custom_functions.R")Mucosal microbiota alterations in primary sclerosis cholangitis persist after liver transplantation and are associated with clinical features independently of geography
MDI calculation
1 Introduction
Microbial Dysbiosis Index (MDI) was defined as the ratio of the total abundance of taxa increased in PSC to the abundance of taxa decreased in PSC. Since we used clr-transformed data, we calculated this index as the difference of both values separately for ileum and colon samples at the genus level.
Importing libraries and custom functions built for this analysis
2 Data Import
Importing ASV, taxa and metadata tables for both Czech and Norway samples.
Czech
path = "../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
check.names = FALSE))Norway
path = "../../data/analysis_ready_data/norway/"
asv_tab_norway <- as.data.frame(fread(file.path(path,"asv_table_norway.csv"),
check.names = FALSE))
taxa_tab_norway <- as.data.frame(fread(file.path(path,"taxa_table_norway.csv"),
check.names = FALSE))
metadata_norway <- as.data.frame(fread(file.path(path,"metadata_norway.csv"),
check.names = FALSE))2.1 Merging
Merging two countries based on the different matrices - Ileum, Colon.
Terminal ileum
ileum_data <- merging_data(asv_tab_1=asv_tab_ikem,
asv_tab_2=asv_tab_norway,
taxa_tab_1=taxa_tab_ikem,
taxa_tab_2=taxa_tab_norway,
metadata_1=metadata_ikem,
metadata_2=metadata_norway,
segment="TI",Q="clinical")Removing 1498 ASV(s)
Removing 1834 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]Colon
colon_data <- merging_data(asv_tab_1=asv_tab_ikem,
asv_tab_2=asv_tab_norway,
taxa_tab_1=taxa_tab_ikem,
taxa_tab_2=taxa_tab_norway,
metadata_1=metadata_ikem,
metadata_2=metadata_norway,
segment="colon",Q="clinical")Removing 739 ASV(s)
Removing 266 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]3 Data Analysis - Terminal ileum
segment="terminal_ileum"3.1 Filtering
Library size
read_counts(ileum_asv_tab, line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
3.1.1 Sequencing depth
data_filt <- seq_depth_filtering(ileum_asv_tab,
ileum_taxa_tab,
ileum_metadata,
seq_depth_threshold = 10000)Removing 131 ASV(s)
filt_ileum_asv_tab <- data_filt[[1]]; alpha_ileum_asv_tab <- filt_ileum_asv_tab
filt_ileum_taxa_tab <- data_filt[[2]]; alpha_ileum_taxa_tab <- filt_ileum_taxa_tab
filt_ileum_metadata <- data_filt[[3]]; alpha_ileum_metadata <- filt_ileum_metadata
seq_step <- dim(filt_ileum_asv_tab)[1]Library size
read_counts(filt_ileum_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
3.1.2 NearZeroVar
data_filt <- nearzerovar_filtering(filt_ileum_asv_tab,
filt_ileum_taxa_tab,
filt_ileum_metadata)
filt_ileum_asv_tab <- data_filt[[1]]
filt_ileum_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_ileum_asv_tab)[1]Library size
read_counts(filt_ileum_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
3.1.3 Final Counts
final_counts_filtering(ileum_asv_tab,
filt_ileum_asv_tab,
filt_ileum_metadata,
seq_step, 0, nearzero_step) %>% `colnames<-`("Count") Count
Raw data: ASVs 5132
Raw data: Samples 294
Sequencing depth filt: ASVs 5001
Prevalence filt: ASVs 0
NearZeroVar filt: ASVs 603
Filt data: ASVs 603
Filt data: Samples 281
Filt data: Patients 281
Filt data: Patients.1 0
Filtered samples 13
Matrices TI
healthy 73
non-rPSC 105
rPSC 33
pre_ltx 70
post_ltx 0
ETOH 0
3.2 Important taxa
3.2.0.1 ASV level
psc_asv <- read.xlsx("../results/Q1/univariate_analysis/supplements_psc_effect_terminal_ileum.xlsx",
sheet = "terminal_ileum ASV")
psc_increased_asv <- psc_asv$SeqID[psc_asv$log2FoldChange>0]
psc_decreased_asv <- psc_asv$SeqID[psc_asv$log2FoldChange<0]3.2.0.2 Genus level
psc_genus <- read.xlsx("../results/Q1/univariate_analysis/supplements_psc_effect_terminal_ileum.xlsx",
sheet = "terminal_ileum genus")
psc_increased_genus <- psc_genus$SeqID[psc_genus$log2FoldChange>0]
psc_decreased_genus <- psc_genus$SeqID[psc_genus$log2FoldChange<0]3.3 Relative abundances
3.3.1 Unfiltered data
3.3.1.1 ASV level
Calculation
dys_unfiltered_asv <- dysbiosis_index_calculation(
ileum_asv_tab,
ileum_metadata,
psc_increased_asv,
psc_decreased_asv,
"dys_unfiltered_asv")3.3.1.2 Genus level
Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,ileum_taxa_tab,
taxonomic_level="Genus",names=TRUE)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa <- genus_data[[2]]Calculation
dys_unfiltered_genus <- dysbiosis_index_calculation(
ileum_genus_tab,
ileum_metadata,
psc_increased_genus,
psc_decreased_genus,
"dys_unfiltered_genus")3.3.2 Filtered data
3.3.2.1 ASV level
Calculation
dys_filtered_asv <- dysbiosis_index_calculation(
filt_ileum_asv_tab,
filt_ileum_metadata,
psc_increased_asv,
psc_decreased_asv,
"dys_filtered_asv")3.3.2.2 Genus level
Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,ileum_taxa_tab,
taxonomic_level="Genus",names=TRUE)
filt_data <- filtering_steps(genus_data[[1]],genus_data[[2]],ileum_metadata,
seq_depth_threshold=10000)Removing 5 ASV(s)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata <- filt_data[[3]]Calculation
dys_filtered_genus <- dysbiosis_index_calculation(
ileum_genus_tab,
ileum_metadata,
psc_increased_genus,
psc_decreased_genus,
"dys_filtered_genus")3.3.3 Scatterplot
dysbiosis_data <- dys_unfiltered_asv %>% full_join(dys_unfiltered_genus, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_asv, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_genus, by=c("SampleID","PatientID"))dysbiosis_without_na <- drop_na(dysbiosis_data)
dysbiosis_corr <- cor.table(dysbiosis_without_na[,3:6], cor.method="spearman")[[1]]
dysbiosis_corr dys_unfiltered_asv dys_unfiltered_genus dys_filtered_asv
dys_unfiltered_asv 1.0000000 0.5086742 0.9998367
dys_unfiltered_genus 0.5086742 1.0000000 0.5081042
dys_filtered_asv 0.9998367 0.5081042 1.0000000
dys_filtered_genus 0.5086742 1.0000000 0.5081042
dys_filtered_genus
dys_unfiltered_asv 0.5086742
dys_unfiltered_genus 1.0000000
dys_filtered_asv 0.5081042
dys_filtered_genus 1.0000000
pairs(dysbiosis_without_na[,3:6])ileum_metadata_final <- ileum_metadata %>% full_join(dysbiosis_data, by=c("SampleID"))ileum_metadata_final_melted <- melt(ileum_metadata_final)Using SampleID, Patient, Group, Matrix, Country, PatientID as id variables
p <- ggplot(ileum_metadata_final_melted) +
geom_boxplot(aes(x=Group, y=value),outliers = FALSE) +
geom_jitter(width = 0.2,height = 0,aes(x=Group, y=value, color=Group),size=2) +
facet_wrap(~variable, ncol = 2,scales = "free") +
theme_bw() +
scale_fill_manual(values=c("#309f87","#F08080","#f9c675","#A00000")) +
scale_color_manual(values=c("#309f87","#F08080","#f9c675","#A00000"))
p 3.4 CLR-transformed
3.4.1 Unfiltered data
3.4.1.1 ASV level
Calculation
dys_unfiltered_asv <- dysbiosis_index_calculation_clr(ileum_asv_tab,
ileum_metadata,
psc_increased_asv,
psc_decreased_asv,
"dys_unfiltered_asv")3.4.1.2 Genus level
Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,ileum_taxa_tab,
taxonomic_level="Genus",names=TRUE)
ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa <- genus_data[[2]]Calculation
dys_unfiltered_genus <- dysbiosis_index_calculation_clr(ileum_genus_tab,
ileum_metadata,
psc_increased_genus,
psc_decreased_genus,
"dys_unfiltered_genus")3.4.2 Filtered data
3.4.2.1 ASV level
Calculation
dys_filtered_asv <- dysbiosis_index_calculation_clr(filt_ileum_asv_tab,
filt_ileum_metadata,
psc_increased_asv,
psc_decreased_asv,
"dys_filtered_asv")3.4.2.2 Genus level
Aggregation
genus_data <- aggregate_taxa(ileum_asv_tab,ileum_taxa_tab,
taxonomic_level="Genus",names=TRUE)
filt_data <- filtering_steps(genus_data[[1]],genus_data[[2]],ileum_metadata,
seq_depth_threshold=10000)Removing 5 ASV(s)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata <- filt_data[[3]]Calculation
dys_filtered_genus <- dysbiosis_index_calculation_clr(filt_ileum_genus_tab,
filt_ileum_metadata,
psc_increased_genus,
psc_decreased_genus,
"dys_filtered_genus")3.4.3 Scatterplot
dysbiosis_data <- dys_unfiltered_asv %>% full_join(dys_unfiltered_genus, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_asv, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_genus, by=c("SampleID","PatientID"))dysbiosis_without_na <- drop_na(dysbiosis_data)
dysbiosis_corr <- cor.table(dysbiosis_without_na[,3:6], cor.method="spearman")[[1]]
dysbiosis_corr dys_unfiltered_asv dys_unfiltered_genus dys_filtered_asv
dys_unfiltered_asv 1.0000000 0.8067886 0.9963385
dys_unfiltered_genus 0.8067886 1.0000000 0.7789149
dys_filtered_asv 0.9963385 0.7789149 1.0000000
dys_filtered_genus 0.8088973 0.9938647 0.7871783
dys_filtered_genus
dys_unfiltered_asv 0.8088973
dys_unfiltered_genus 0.9938647
dys_filtered_asv 0.7871783
dys_filtered_genus 1.0000000
pairs(dysbiosis_without_na[,3:6])ileum_metadata_final <- ileum_metadata %>% full_join(dysbiosis_data, by=c("SampleID"))ileum_metadata_final_melted <- melt(ileum_metadata_final)Using SampleID, Patient, Group, Matrix, Country, PatientID as id variables
p <- ggplot(ileum_metadata_final_melted) +
geom_boxplot(aes(x=Group, y=value),outliers = FALSE) +
geom_jitter(width = 0.2,height = 0,aes(x=Group, y=value, color=Group),size=2) +
facet_wrap(~variable, ncol = 2,scales = "free") +
theme_bw() +
scale_fill_manual(values=c("#309f87","#F08080","#f9c675","#A00000")) +
scale_color_manual(values=c("#309f87","#F08080","#f9c675","#A00000"))
p 4 Data Analysis - Colon
segment="colon"4.1 Filtering
Rules: - sequencing depth > 10000
- nearZeroVar() with default settings
Library size
read_counts(colon_asv_tab, line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
4.1.1 Sequencing depth
data_filt <- seq_depth_filtering(colon_asv_tab,
colon_taxa_tab,
colon_metadata,
seq_depth_threshold = 10000)Removing 75 ASV(s)
filt_colon_asv_tab <- data_filt[[1]]; alpha_colon_asv_tab <- filt_colon_asv_tab
filt_colon_taxa_tab <- data_filt[[2]]; alpha_colon_taxa_tab <- filt_colon_taxa_tab
filt_colon_metadata <- data_filt[[3]]; alpha_colon_metadata <- filt_colon_metadata
seq_step <- dim(filt_colon_asv_tab)[1]Library size
read_counts(filt_colon_asv_tab,line = c(10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
4.1.2 NearZeroVar
data_filt <- nearzerovar_filtering(filt_colon_asv_tab,
filt_colon_taxa_tab,
filt_colon_metadata)
filt_colon_asv_tab <- data_filt[[1]]
filt_colon_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_colon_asv_tab)[1]Library size
read_counts(filt_colon_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Check zero depth
data_filt <- check_zero_depth(filt_colon_asv_tab,
filt_colon_taxa_tab,
filt_colon_metadata)
filt_colon_asv_tab <- data_filt[[1]];
filt_colon_taxa_tab <- data_filt[[2]];
filt_colon_metadata <- data_filt[[3]]; Library size
read_counts(filt_colon_asv_tab,line = c(5000,10000))Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
4.1.3 Final Counts
final_counts_filtering(colon_asv_tab,
filt_colon_asv_tab,
filt_colon_metadata,
seq_step, 0, nearzero_step) V1
Raw data: ASVs 6936
Raw data: Samples 789
Sequencing depth filt: ASVs 6861
Prevalence filt: ASVs 0
NearZeroVar filt: ASVs 449
Filt data: ASVs 449
Filt data: Samples 761
Filt data: Patients 355
Filt data: Patients.1 0
Filtered samples 28
Matrices Cecum;Rectum;CD;CA;SI
healthy 161
non-rPSC 263
rPSC 87
pre_ltx 250
post_ltx 0
ETOH 0
4.2 Important taxa
4.2.0.1 ASV level
psc_asv <- read.xlsx("../results/Q1/univariate_analysis/supplements_psc_effect_colon.xlsx",
sheet = "colon ASV")
psc_increased_asv <- psc_asv$SeqID[psc_asv$log2FoldChange>0]
psc_decreased_asv <- psc_asv$SeqID[psc_asv$log2FoldChange<0]4.2.0.2 Genus level
psc_genus <- read.xlsx("../results/Q1/univariate_analysis/supplements_psc_effect_colon.xlsx",
sheet = "colon genus")
psc_increased_genus <- psc_genus$SeqID[psc_genus$log2FoldChange>0]
psc_decreased_genus <- psc_genus$SeqID[psc_genus$log2FoldChange<0]4.3 Relative abundances
4.3.1 Unfiltered data
4.3.1.1 ASV level
Calculation
dys_unfiltered_asv <- dysbiosis_index_calculation(colon_asv_tab,
colon_metadata,
psc_increased_asv,
psc_decreased_asv,
"dys_unfiltered_asv")4.3.1.2 Genus level
Aggregation
genus_data <- aggregate_taxa(colon_asv_tab,colon_taxa_tab,
taxonomic_level="Genus",names=TRUE)
colon_genus_tab <- genus_data[[1]]
colon_genus_taxa <- genus_data[[2]]Calculation
dys_unfiltered_genus <- dysbiosis_index_calculation(colon_genus_tab,
colon_metadata,
psc_increased_genus,
psc_decreased_genus,
"dys_unfiltered_genus")4.3.2 Filtered data
4.3.2.1 ASV level
Calculation
dys_filtered_asv <- dysbiosis_index_calculation(filt_colon_asv_tab,
filt_colon_metadata,
psc_increased_asv,
psc_decreased_asv,
"dys_filtered_asv")4.3.2.2 Genus level
Aggregation
genus_data <- aggregate_taxa(colon_asv_tab,colon_taxa_tab,
taxonomic_level="Genus",names=TRUE)
filt_data <- filtering_steps(genus_data[[1]],genus_data[[2]],colon_metadata,
seq_depth_threshold=10000)Removing 10 ASV(s)
filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_metadata <- filt_data[[3]]Calculation
dys_filtered_genus <- dysbiosis_index_calculation(colon_genus_tab,
colon_metadata,
psc_increased_genus,
psc_decreased_genus,
"dys_filtered_genus")4.3.3 Scatterplot
dysbiosis_data <- dys_unfiltered_asv %>% full_join(dys_unfiltered_genus, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_asv, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_genus, by=c("SampleID","PatientID"))dysbiosis_without_na <- drop_na(dysbiosis_data)
dysbiosis_corr <- cor.table(dysbiosis_without_na[,3:6], cor.method="spearman")[[1]]
dysbiosis_corr dys_unfiltered_asv dys_unfiltered_genus dys_filtered_asv
dys_unfiltered_asv 1.0000000 0.6341152 0.9999958
dys_unfiltered_genus 0.6341152 1.0000000 0.6340594
dys_filtered_asv 0.9999958 0.6340594 1.0000000
dys_filtered_genus 0.6341152 1.0000000 0.6340594
dys_filtered_genus
dys_unfiltered_asv 0.6341152
dys_unfiltered_genus 1.0000000
dys_filtered_asv 0.6340594
dys_filtered_genus 1.0000000
pairs(dysbiosis_without_na[,3:6])colon_metadata_final <- colon_metadata %>% full_join(dysbiosis_data, by=c("SampleID"))colon_metadata_final_melted <- melt(colon_metadata_final)Using SampleID, Patient, Group, Matrix, Country, PatientID as id variables
p <- ggplot(colon_metadata_final_melted) +
geom_boxplot(aes(x=Group, y=value),outliers = FALSE) +
geom_jitter(width = 0.2,height = 0,aes(x=Group, y=value, color=Group),size=2) +
facet_wrap(~variable, ncol = 2,scales = "free") +
theme_bw() +
scale_fill_manual(values=c("#309f87","#F08080","#f9c675","#A00000")) +
scale_color_manual(values=c("#309f87","#F08080","#f9c675","#A00000"))
p 4.4 CLR-transformed
4.4.1 Unfiltered data
4.4.1.1 ASV level
dys_unfiltered_asv
Calculation
dys_unfiltered_asv <- dysbiosis_index_calculation_clr(colon_asv_tab,
colon_metadata,
psc_increased_asv,
psc_decreased_asv,
"dys_unfiltered_asv")4.4.1.2 Genus level
dys_unfiltered_genus
Aggregation
genus_data <- aggregate_taxa(colon_asv_tab,colon_taxa_tab,
taxonomic_level="Genus",names=TRUE)
colon_genus_tab <- genus_data[[1]]
colon_genus_taxa <- genus_data[[2]]Calculation
dys_unfiltered_genus <- dysbiosis_index_calculation_clr(colon_genus_tab,
colon_metadata,
psc_increased_genus,
psc_decreased_genus,
"dys_unfiltered_genus")4.4.2 Filtered data
4.4.2.1 ASV level
dys_filtered_asv
Calculation
dys_filtered_asv <- dysbiosis_index_calculation_clr(filt_colon_asv_tab,
filt_colon_metadata,
psc_increased_asv,
psc_decreased_asv,
"dys_filtered_asv")4.4.2.2 Genus level
dys_filtered_genus
Aggregation
genus_data <- aggregate_taxa(colon_asv_tab,colon_taxa_tab,
taxonomic_level="Genus",names=TRUE)
filt_data <- filtering_steps(genus_data[[1]],genus_data[[2]],colon_metadata,
seq_depth_threshold=10000)Removing 10 ASV(s)
filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_metadata <- filt_data[[3]]Calculation
dys_filtered_genus <- dysbiosis_index_calculation_clr(filt_colon_genus_tab,
filt_colon_metadata,
psc_increased_genus,
psc_decreased_genus,
"dys_filtered_genus")4.4.3 Scatterplot
dysbiosis_data <- dys_unfiltered_asv %>% full_join(dys_unfiltered_genus, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_asv, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_genus, by=c("SampleID","PatientID"))dysbiosis_without_na <- drop_na(dysbiosis_data)
dysbiosis_corr <- cor.table(dysbiosis_without_na[,3:6], cor.method="spearman")[[1]]
dysbiosis_corr dys_unfiltered_asv dys_unfiltered_genus dys_filtered_asv
dys_unfiltered_asv 1.0000000 0.8469789 0.9955350
dys_unfiltered_genus 0.8469789 1.0000000 0.8231594
dys_filtered_asv 0.9955350 0.8231594 1.0000000
dys_filtered_genus 0.8428275 0.9961173 0.8249441
dys_filtered_genus
dys_unfiltered_asv 0.8428275
dys_unfiltered_genus 0.9961173
dys_filtered_asv 0.8249441
dys_filtered_genus 1.0000000
pairs(dysbiosis_without_na[,3:6])colon_metadata_final <- colon_metadata %>% full_join(dysbiosis_data, by=c("SampleID"))colon_metadata_final_melted <- melt(colon_metadata_final)Using SampleID, Patient, Group, Matrix, Country, PatientID as id variables
p <- ggplot(colon_metadata_final_melted) +
geom_boxplot(aes(x=Group, y=value),outliers = FALSE) +
geom_jitter(width = 0.2,height = 0,aes(x=Group, y=value, color=Group),size=2) +
facet_wrap(~variable, ncol = 2,scales = "free") +
theme_bw() +
scale_fill_manual(values=c("#309f87","#F08080","#f9c675","#A00000")) +
scale_color_manual(values=c("#309f87","#F08080","#f9c675","#A00000"))
p metadata_dysbiosis_final <- rbind(colon_metadata_final,ileum_metadata_final)write.csv(metadata_dysbiosis_final,
"../../data/clinical_data/dysbiosis_metadata.csv",row.names = FALSE)