Mucosal microbiota alterations in primary sclerosis cholangitis persist after liver transplantation and are associated with clinical features independently of geography

MDI calculation


1 Introduction

Microbial Dysbiosis Index (MDI) was defined as the ratio of the total abundance of taxa increased in PSC to the abundance of taxa decreased in PSC. Since we used clr-transformed data, we calculated this index as the difference of both values separately for ileum and colon samples at the genus level.

Importing libraries and custom functions built for this analysis

source("custom_functions.R")

2 Data Import

Importing ASV, taxa and metadata tables for both Czech and Norway samples.

Czech

path = "../../data/analysis_ready_data/ikem/"
asv_tab_ikem <- as.data.frame(fread(file.path(path,"asv_table_ikem.csv"),
                                    check.names = FALSE))
taxa_tab_ikem <- as.data.frame(fread(file.path(path,"taxa_table_ikem.csv"),
                                     check.names = FALSE))
metadata_ikem <- as.data.frame(fread(file.path(path,"metadata_ikem.csv"),
                                     check.names = FALSE))

Norway

path = "../../data/analysis_ready_data/norway/"
asv_tab_norway <- as.data.frame(fread(file.path(path,"asv_table_norway.csv"),
                                    check.names = FALSE))
taxa_tab_norway <- as.data.frame(fread(file.path(path,"taxa_table_norway.csv"),
                                    check.names = FALSE))
metadata_norway <- as.data.frame(fread(file.path(path,"metadata_norway.csv"),
                                    check.names = FALSE))

2.1 Merging

Merging two countries based on the different matrices - Ileum, Colon.

Terminal ileum

ileum_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=asv_tab_norway,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=taxa_tab_norway,
                           metadata_1=metadata_ikem,
                           metadata_2=metadata_norway,
                           segment="TI",Q="clinical")
Removing 1498 ASV(s)
Removing 1834 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
ileum_asv_tab <- ileum_data[[1]]
ileum_taxa_tab <- ileum_data[[2]]
ileum_metadata <- ileum_data[[3]]

Colon

colon_data <- merging_data(asv_tab_1=asv_tab_ikem,
                           asv_tab_2=asv_tab_norway,
                           taxa_tab_1=taxa_tab_ikem,
                           taxa_tab_2=taxa_tab_norway,
                           metadata_1=metadata_ikem,
                           metadata_2=metadata_norway,
                           segment="colon",Q="clinical")
Removing 739 ASV(s)
Removing 266 ASV(s)
Merging at ASV level
Finding inconsistencies in taxonomy, trying to keep the ones that have better taxonomy assignment
colon_asv_tab <- colon_data[[1]]
colon_taxa_tab <- colon_data[[2]]
colon_metadata <- colon_data[[3]]

3 Data Analysis - Terminal ileum

segment="terminal_ileum"

3.1 Filtering

Library size

read_counts(ileum_asv_tab, line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.1 Sequencing depth

data_filt <- seq_depth_filtering(ileum_asv_tab,
                                 ileum_taxa_tab,
                                 ileum_metadata,
                                 seq_depth_threshold = 10000)
Removing 131 ASV(s)
filt_ileum_asv_tab <- data_filt[[1]]; alpha_ileum_asv_tab <- filt_ileum_asv_tab
filt_ileum_taxa_tab <- data_filt[[2]]; alpha_ileum_taxa_tab <- filt_ileum_taxa_tab
filt_ileum_metadata <- data_filt[[3]]; alpha_ileum_metadata <- filt_ileum_metadata

seq_step <- dim(filt_ileum_asv_tab)[1]

Library size

read_counts(filt_ileum_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.2 NearZeroVar

data_filt <- nearzerovar_filtering(filt_ileum_asv_tab, 
                                   filt_ileum_taxa_tab,
                                   filt_ileum_metadata)

filt_ileum_asv_tab <- data_filt[[1]]
filt_ileum_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_ileum_asv_tab)[1]

Library size

read_counts(filt_ileum_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

3.1.3 Final Counts

final_counts_filtering(ileum_asv_tab,
                       filt_ileum_asv_tab,
                       filt_ileum_metadata,
                       seq_step, 0, nearzero_step) %>% `colnames<-`("Count")
                            Count
Raw data: ASVs               5132
Raw data: Samples             294
Sequencing depth filt: ASVs  5001
Prevalence filt: ASVs           0
NearZeroVar filt: ASVs        603
Filt data: ASVs               603
Filt data: Samples            281
Filt data: Patients           281
Filt data: Patients.1           0
Filtered samples               13
Matrices                       TI
healthy                        73
non-rPSC                      105
rPSC                           33
pre_ltx                        70
post_ltx                        0
ETOH                            0

3.2 Important taxa

3.2.0.1 ASV level

psc_asv <- read.xlsx("../results/Q1/univariate_analysis/supplements_psc_effect_terminal_ileum.xlsx",
                     sheet = "terminal_ileum ASV")
psc_increased_asv <- psc_asv$SeqID[psc_asv$log2FoldChange>0]
psc_decreased_asv <- psc_asv$SeqID[psc_asv$log2FoldChange<0]

3.2.0.2 Genus level

psc_genus <- read.xlsx("../results/Q1/univariate_analysis/supplements_psc_effect_terminal_ileum.xlsx",
                     sheet = "terminal_ileum genus")
psc_increased_genus <- psc_genus$SeqID[psc_genus$log2FoldChange>0]
psc_decreased_genus <- psc_genus$SeqID[psc_genus$log2FoldChange<0]

3.3 Relative abundances

3.3.1 Unfiltered data

3.3.1.1 ASV level

Calculation

dys_unfiltered_asv <- dysbiosis_index_calculation(
  ileum_asv_tab,
  ileum_metadata,
  psc_increased_asv,
  psc_decreased_asv,                                  
  "dys_unfiltered_asv")

3.3.1.2 Genus level

Aggregation

genus_data <- aggregate_taxa(ileum_asv_tab,ileum_taxa_tab,
                             taxonomic_level="Genus",names=TRUE)


ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa <- genus_data[[2]]

Calculation

dys_unfiltered_genus <- dysbiosis_index_calculation(
  ileum_genus_tab,
  ileum_metadata,
  psc_increased_genus,
  psc_decreased_genus,
  "dys_unfiltered_genus")

3.3.2 Filtered data

3.3.2.1 ASV level

Calculation

dys_filtered_asv <- dysbiosis_index_calculation(
  filt_ileum_asv_tab,
  filt_ileum_metadata,
  psc_increased_asv,
  psc_decreased_asv,
  "dys_filtered_asv")

3.3.2.2 Genus level

Aggregation

genus_data <- aggregate_taxa(ileum_asv_tab,ileum_taxa_tab,
                             taxonomic_level="Genus",names=TRUE)

filt_data <- filtering_steps(genus_data[[1]],genus_data[[2]],ileum_metadata,
                            seq_depth_threshold=10000)
Removing 5 ASV(s)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata <- filt_data[[3]]

Calculation

dys_filtered_genus <- dysbiosis_index_calculation(
  ileum_genus_tab,
  ileum_metadata,
  psc_increased_genus,
  psc_decreased_genus,
  "dys_filtered_genus")

3.3.3 Scatterplot

dysbiosis_data <- dys_unfiltered_asv %>% full_join(dys_unfiltered_genus, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_asv, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_genus, by=c("SampleID","PatientID"))
dysbiosis_without_na <- drop_na(dysbiosis_data)

dysbiosis_corr <- cor.table(dysbiosis_without_na[,3:6], cor.method="spearman")[[1]]
dysbiosis_corr
                     dys_unfiltered_asv dys_unfiltered_genus dys_filtered_asv
dys_unfiltered_asv            1.0000000            0.5086742        0.9998367
dys_unfiltered_genus          0.5086742            1.0000000        0.5081042
dys_filtered_asv              0.9998367            0.5081042        1.0000000
dys_filtered_genus            0.5086742            1.0000000        0.5081042
                     dys_filtered_genus
dys_unfiltered_asv            0.5086742
dys_unfiltered_genus          1.0000000
dys_filtered_asv              0.5081042
dys_filtered_genus            1.0000000
pairs(dysbiosis_without_na[,3:6])

ileum_metadata_final <- ileum_metadata %>% full_join(dysbiosis_data, by=c("SampleID"))
ileum_metadata_final_melted <- melt(ileum_metadata_final)
Using SampleID, Patient, Group, Matrix, Country, PatientID as id variables
p <- ggplot(ileum_metadata_final_melted) + 
  geom_boxplot(aes(x=Group, y=value),outliers = FALSE) + 
    geom_jitter(width = 0.2,height = 0,aes(x=Group, y=value, color=Group),size=2) +
  facet_wrap(~variable, ncol = 2,scales = "free") + 
  theme_bw() + 
  scale_fill_manual(values=c("#309f87","#F08080","#f9c675","#A00000")) + 
scale_color_manual(values=c("#309f87","#F08080","#f9c675","#A00000"))

p    

3.4 CLR-transformed

3.4.1 Unfiltered data

3.4.1.1 ASV level

Calculation

dys_unfiltered_asv <- dysbiosis_index_calculation_clr(ileum_asv_tab,
                                                    ileum_metadata,
                                                    psc_increased_asv,
                                                    psc_decreased_asv,
                                                    "dys_unfiltered_asv")

3.4.1.2 Genus level

Aggregation

genus_data <- aggregate_taxa(ileum_asv_tab,ileum_taxa_tab,
                             taxonomic_level="Genus",names=TRUE)


ileum_genus_tab <- genus_data[[1]]
ileum_genus_taxa <- genus_data[[2]]

Calculation

dys_unfiltered_genus <- dysbiosis_index_calculation_clr(ileum_genus_tab,
                                                    ileum_metadata,
                                                    psc_increased_genus,
                                                    psc_decreased_genus,
                                                    "dys_unfiltered_genus")

3.4.2 Filtered data

3.4.2.1 ASV level

Calculation

dys_filtered_asv <- dysbiosis_index_calculation_clr(filt_ileum_asv_tab,
                                                filt_ileum_metadata,
                                                    psc_increased_asv,
                                                    psc_decreased_asv,
                                                    "dys_filtered_asv")

3.4.2.2 Genus level

Aggregation

genus_data <- aggregate_taxa(ileum_asv_tab,ileum_taxa_tab,
                             taxonomic_level="Genus",names=TRUE)

filt_data <- filtering_steps(genus_data[[1]],genus_data[[2]],ileum_metadata,
                            seq_depth_threshold=10000)
Removing 5 ASV(s)
filt_ileum_genus_tab <- filt_data[[1]]
filt_ileum_genus_taxa <- filt_data[[2]]
filt_ileum_metadata <- filt_data[[3]]

Calculation

dys_filtered_genus <- dysbiosis_index_calculation_clr(filt_ileum_genus_tab,
                                                    filt_ileum_metadata,
                                                    psc_increased_genus,
                                                    psc_decreased_genus,
                                                    "dys_filtered_genus")

3.4.3 Scatterplot

dysbiosis_data <- dys_unfiltered_asv %>% full_join(dys_unfiltered_genus, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_asv, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_genus, by=c("SampleID","PatientID"))
dysbiosis_without_na <- drop_na(dysbiosis_data)

dysbiosis_corr <- cor.table(dysbiosis_without_na[,3:6], cor.method="spearman")[[1]]
dysbiosis_corr
                     dys_unfiltered_asv dys_unfiltered_genus dys_filtered_asv
dys_unfiltered_asv            1.0000000            0.8067886        0.9963385
dys_unfiltered_genus          0.8067886            1.0000000        0.7789149
dys_filtered_asv              0.9963385            0.7789149        1.0000000
dys_filtered_genus            0.8088973            0.9938647        0.7871783
                     dys_filtered_genus
dys_unfiltered_asv            0.8088973
dys_unfiltered_genus          0.9938647
dys_filtered_asv              0.7871783
dys_filtered_genus            1.0000000
pairs(dysbiosis_without_na[,3:6])

ileum_metadata_final <- ileum_metadata %>% full_join(dysbiosis_data, by=c("SampleID"))
ileum_metadata_final_melted <- melt(ileum_metadata_final)
Using SampleID, Patient, Group, Matrix, Country, PatientID as id variables
p <- ggplot(ileum_metadata_final_melted) + 
  geom_boxplot(aes(x=Group, y=value),outliers = FALSE) + 
    geom_jitter(width = 0.2,height = 0,aes(x=Group, y=value, color=Group),size=2) +
  facet_wrap(~variable, ncol = 2,scales = "free") + 
  theme_bw() + 
  scale_fill_manual(values=c("#309f87","#F08080","#f9c675","#A00000")) + 
scale_color_manual(values=c("#309f87","#F08080","#f9c675","#A00000"))
p    

4 Data Analysis - Colon

segment="colon"

4.1 Filtering

Rules: - sequencing depth > 10000

  • nearZeroVar() with default settings

Library size

read_counts(colon_asv_tab, line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

4.1.1 Sequencing depth

data_filt <- seq_depth_filtering(colon_asv_tab,
                                 colon_taxa_tab,
                                 colon_metadata,
                                 seq_depth_threshold = 10000)
Removing 75 ASV(s)
filt_colon_asv_tab <- data_filt[[1]]; alpha_colon_asv_tab <- filt_colon_asv_tab
filt_colon_taxa_tab <- data_filt[[2]]; alpha_colon_taxa_tab <- filt_colon_taxa_tab
filt_colon_metadata <- data_filt[[3]]; alpha_colon_metadata <- filt_colon_metadata

seq_step <- dim(filt_colon_asv_tab)[1]

Library size

read_counts(filt_colon_asv_tab,line = c(10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

4.1.2 NearZeroVar

data_filt <- nearzerovar_filtering(filt_colon_asv_tab,
                                   filt_colon_taxa_tab,
                                   filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]
filt_colon_taxa_tab <- data_filt[[2]]
nearzero_step <- dim(filt_colon_asv_tab)[1]

Library size

read_counts(filt_colon_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

Check zero depth

data_filt <- check_zero_depth(filt_colon_asv_tab, 
                              filt_colon_taxa_tab, 
                              filt_colon_metadata)

filt_colon_asv_tab <- data_filt[[1]]; 
filt_colon_taxa_tab <- data_filt[[2]]; 
filt_colon_metadata <- data_filt[[3]]; 

Library size

read_counts(filt_colon_asv_tab,line = c(5000,10000))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

4.1.3 Final Counts

final_counts_filtering(colon_asv_tab,
                       filt_colon_asv_tab,
                       filt_colon_metadata,
                       seq_step, 0, nearzero_step)
                                               V1
Raw data: ASVs                               6936
Raw data: Samples                             789
Sequencing depth filt: ASVs                  6861
Prevalence filt: ASVs                           0
NearZeroVar filt: ASVs                        449
Filt data: ASVs                               449
Filt data: Samples                            761
Filt data: Patients                           355
Filt data: Patients.1                           0
Filtered samples                               28
Matrices                    Cecum;Rectum;CD;CA;SI
healthy                                       161
non-rPSC                                      263
rPSC                                           87
pre_ltx                                       250
post_ltx                                        0
ETOH                                            0

4.2 Important taxa

4.2.0.1 ASV level

psc_asv <- read.xlsx("../results/Q1/univariate_analysis/supplements_psc_effect_colon.xlsx",
                     sheet = "colon ASV")
psc_increased_asv <- psc_asv$SeqID[psc_asv$log2FoldChange>0]
psc_decreased_asv <- psc_asv$SeqID[psc_asv$log2FoldChange<0]

4.2.0.2 Genus level

psc_genus <- read.xlsx("../results/Q1/univariate_analysis/supplements_psc_effect_colon.xlsx",
                     sheet = "colon genus")
psc_increased_genus <- psc_genus$SeqID[psc_genus$log2FoldChange>0]
psc_decreased_genus <- psc_genus$SeqID[psc_genus$log2FoldChange<0]

4.3 Relative abundances

4.3.1 Unfiltered data

4.3.1.1 ASV level

Calculation

dys_unfiltered_asv <- dysbiosis_index_calculation(colon_asv_tab,
                                                    colon_metadata,
                                                    psc_increased_asv,
                                                    psc_decreased_asv,
                                                    "dys_unfiltered_asv")

4.3.1.2 Genus level

Aggregation

genus_data <- aggregate_taxa(colon_asv_tab,colon_taxa_tab,
                             taxonomic_level="Genus",names=TRUE)


colon_genus_tab <- genus_data[[1]]
colon_genus_taxa <- genus_data[[2]]

Calculation

dys_unfiltered_genus <- dysbiosis_index_calculation(colon_genus_tab,
                                                    colon_metadata,
                                                    psc_increased_genus,
                                                    psc_decreased_genus,
                                                    "dys_unfiltered_genus")

4.3.2 Filtered data

4.3.2.1 ASV level

Calculation

dys_filtered_asv <- dysbiosis_index_calculation(filt_colon_asv_tab,
                                                filt_colon_metadata,
                                                    psc_increased_asv,
                                                    psc_decreased_asv,
                                                    "dys_filtered_asv")

4.3.2.2 Genus level

Aggregation

genus_data <- aggregate_taxa(colon_asv_tab,colon_taxa_tab,
                             taxonomic_level="Genus",names=TRUE)

filt_data <- filtering_steps(genus_data[[1]],genus_data[[2]],colon_metadata,
                            seq_depth_threshold=10000)
Removing 10 ASV(s)
filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_metadata <- filt_data[[3]]

Calculation

dys_filtered_genus <- dysbiosis_index_calculation(colon_genus_tab,
                                                    colon_metadata,
                                                    psc_increased_genus,
                                                    psc_decreased_genus,
                                                    "dys_filtered_genus")

4.3.3 Scatterplot

dysbiosis_data <- dys_unfiltered_asv %>% full_join(dys_unfiltered_genus, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_asv, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_genus, by=c("SampleID","PatientID"))
dysbiosis_without_na <- drop_na(dysbiosis_data)

dysbiosis_corr <- cor.table(dysbiosis_without_na[,3:6], cor.method="spearman")[[1]]
dysbiosis_corr
                     dys_unfiltered_asv dys_unfiltered_genus dys_filtered_asv
dys_unfiltered_asv            1.0000000            0.6341152        0.9999958
dys_unfiltered_genus          0.6341152            1.0000000        0.6340594
dys_filtered_asv              0.9999958            0.6340594        1.0000000
dys_filtered_genus            0.6341152            1.0000000        0.6340594
                     dys_filtered_genus
dys_unfiltered_asv            0.6341152
dys_unfiltered_genus          1.0000000
dys_filtered_asv              0.6340594
dys_filtered_genus            1.0000000
pairs(dysbiosis_without_na[,3:6])

colon_metadata_final <- colon_metadata %>% full_join(dysbiosis_data, by=c("SampleID"))
colon_metadata_final_melted <- melt(colon_metadata_final)
Using SampleID, Patient, Group, Matrix, Country, PatientID as id variables
p <- ggplot(colon_metadata_final_melted) + 
  geom_boxplot(aes(x=Group, y=value),outliers = FALSE) + 
    geom_jitter(width = 0.2,height = 0,aes(x=Group, y=value, color=Group),size=2) +
  facet_wrap(~variable, ncol = 2,scales = "free") + 
  theme_bw() + 
  scale_fill_manual(values=c("#309f87","#F08080","#f9c675","#A00000")) + 
scale_color_manual(values=c("#309f87","#F08080","#f9c675","#A00000"))
p    

4.4 CLR-transformed

4.4.1 Unfiltered data

4.4.1.1 ASV level

dys_unfiltered_asv

Calculation

dys_unfiltered_asv <- dysbiosis_index_calculation_clr(colon_asv_tab,
                                                    colon_metadata,
                                                    psc_increased_asv,
                                                    psc_decreased_asv,
                                                    "dys_unfiltered_asv")

4.4.1.2 Genus level

dys_unfiltered_genus

Aggregation

genus_data <- aggregate_taxa(colon_asv_tab,colon_taxa_tab,
                             taxonomic_level="Genus",names=TRUE)


colon_genus_tab <- genus_data[[1]]
colon_genus_taxa <- genus_data[[2]]

Calculation

dys_unfiltered_genus <- dysbiosis_index_calculation_clr(colon_genus_tab,
                                                    colon_metadata,
                                                    psc_increased_genus,
                                                    psc_decreased_genus,
                                                    "dys_unfiltered_genus")

4.4.2 Filtered data

4.4.2.1 ASV level

dys_filtered_asv

Calculation

dys_filtered_asv <- dysbiosis_index_calculation_clr(filt_colon_asv_tab,
                                                filt_colon_metadata,
                                                    psc_increased_asv,
                                                    psc_decreased_asv,
                                                    "dys_filtered_asv")

4.4.2.2 Genus level

dys_filtered_genus

Aggregation

genus_data <- aggregate_taxa(colon_asv_tab,colon_taxa_tab,
                             taxonomic_level="Genus",names=TRUE)

filt_data <- filtering_steps(genus_data[[1]],genus_data[[2]],colon_metadata,
                            seq_depth_threshold=10000)
Removing 10 ASV(s)
filt_colon_genus_tab <- filt_data[[1]]
filt_colon_genus_taxa <- filt_data[[2]]
filt_colon_metadata <- filt_data[[3]]

Calculation

dys_filtered_genus <- dysbiosis_index_calculation_clr(filt_colon_genus_tab,
                                                    filt_colon_metadata,
                                                    psc_increased_genus,
                                                    psc_decreased_genus,
                                                    "dys_filtered_genus")

4.4.3 Scatterplot

dysbiosis_data <- dys_unfiltered_asv %>% full_join(dys_unfiltered_genus, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_asv, by=c("SampleID","PatientID"))
dysbiosis_data <- dysbiosis_data %>% full_join(dys_filtered_genus, by=c("SampleID","PatientID"))
dysbiosis_without_na <- drop_na(dysbiosis_data)

dysbiosis_corr <- cor.table(dysbiosis_without_na[,3:6], cor.method="spearman")[[1]]
dysbiosis_corr
                     dys_unfiltered_asv dys_unfiltered_genus dys_filtered_asv
dys_unfiltered_asv            1.0000000            0.8469789        0.9955350
dys_unfiltered_genus          0.8469789            1.0000000        0.8231594
dys_filtered_asv              0.9955350            0.8231594        1.0000000
dys_filtered_genus            0.8428275            0.9961173        0.8249441
                     dys_filtered_genus
dys_unfiltered_asv            0.8428275
dys_unfiltered_genus          0.9961173
dys_filtered_asv              0.8249441
dys_filtered_genus            1.0000000
pairs(dysbiosis_without_na[,3:6])

colon_metadata_final <- colon_metadata %>% full_join(dysbiosis_data, by=c("SampleID"))
colon_metadata_final_melted <- melt(colon_metadata_final)
Using SampleID, Patient, Group, Matrix, Country, PatientID as id variables
p <- ggplot(colon_metadata_final_melted) + 
  geom_boxplot(aes(x=Group, y=value),outliers = FALSE) + 
    geom_jitter(width = 0.2,height = 0,aes(x=Group, y=value, color=Group),size=2) +
  facet_wrap(~variable, ncol = 2,scales = "free") + 
  theme_bw() + 
  scale_fill_manual(values=c("#309f87","#F08080","#f9c675","#A00000")) + 
scale_color_manual(values=c("#309f87","#F08080","#f9c675","#A00000"))
p    

metadata_dysbiosis_final <- rbind(colon_metadata_final,ileum_metadata_final)
write.csv(metadata_dysbiosis_final,
          "../../data/clinical_data/dysbiosis_metadata.csv",row.names = FALSE)