ORA based on
KEGGsets_HMDB
We start doing ORA using KEGG metabolite sets with HMDB
identifiers
First perform Fisher test for all sets and select the enriched
ones.
library(localEnrichment)
# ORA per Cachexia vs KEGG (HMDB)
ORA_cachexia_KEGG <- ora_test(
eset = KEGGset_HMDB,
selected = cachexia_sig,
background = cachexia_background,
test = "fisher",
p_adjust = "BH",
min_set_size = 3,
max_set_size = 500
)
# veure com és el resultat
dplyr::glimpse(ORA_cachexia_KEGG)
## Rows: 99
## Columns: 8
## $ set_id <chr> "hsa05230", "hsa00970", "hsa04974", "hsa04978", "hsa…
## $ set_name <chr> "Central carbon metabolism in cancer - Homo sapiens …
## $ n_selected_in_set <int> 17, 13, 14, 10, 14, 8, 10, 6, 9, 8, 6, 5, 5, 5, 5, 3…
## $ n_in_set <int> 35, 23, 47, 27, 82, 26, 47, 15, 45, 40, 22, 17, 18, …
## $ p_value <dbl> 2.039174e-18, 2.441065e-15, 1.026504e-11, 1.122660e-…
## $ overlap_features <chr> "HMDB0000883;HMDB0000929;HMDB0000687;HMDB0000168;HMD…
## $ p_adj <dbl> 2.018783e-16, 1.208327e-13, 3.387462e-10, 2.778584e-…
## $ fold_enrichment <dbl> 16.769087, 19.513899, 10.283920, 12.786885, 5.894442…
head(ORA_cachexia_KEGG)
## # A tibble: 6 × 8
## set_id set_name n_selected_in_set n_in_set p_value overlap_features p_adj
## <chr> <chr> <int> <int> <dbl> <chr> <dbl>
## 1 hsa052… Central… 17 35 2.04e-18 HMDB0000883;HMD… 2.02e-16
## 2 hsa009… Aminoac… 13 23 2.44e-15 HMDB0000883;HMD… 1.21e-13
## 3 hsa049… Protein… 14 47 1.03e-11 HMDB0000883;HMD… 3.39e-10
## 4 hsa049… Mineral… 10 27 1.12e- 9 HMDB0000883;HMD… 2.78e- 8
## 5 hsa020… ABC tra… 14 82 3.11e- 8 HMDB0000883;HMD… 6.15e- 7
## 6 hsa002… Alanine… 8 26 3.23e- 7 HMDB0000168;HMD… 5.32e- 6
## # ℹ 1 more variable: fold_enrichment <dbl>
ORA_cachexia_KEGG_sig <- ORA_cachexia_KEGG %>%
dplyr::filter(p_adj < 0.05) %>%
dplyr::arrange(p_adj)
nrow(ORA_cachexia_KEGG_sig)
## [1] 26
head(ORA_cachexia_KEGG_sig, 10)
## # A tibble: 10 × 8
## set_id set_name n_selected_in_set n_in_set p_value overlap_features p_adj
## <chr> <chr> <int> <int> <dbl> <chr> <dbl>
## 1 hsa05… Central… 17 35 2.04e-18 HMDB0000883;HMD… 2.02e-16
## 2 hsa00… Aminoac… 13 23 2.44e-15 HMDB0000883;HMD… 1.21e-13
## 3 hsa04… Protein… 14 47 1.03e-11 HMDB0000883;HMD… 3.39e-10
## 4 hsa04… Mineral… 10 27 1.12e- 9 HMDB0000883;HMD… 2.78e- 8
## 5 hsa02… ABC tra… 14 82 3.11e- 8 HMDB0000883;HMD… 6.15e- 7
## 6 hsa00… Alanine… 8 26 3.23e- 7 HMDB0000168;HMD… 5.32e- 6
## 7 hsa00… Glyoxyl… 10 47 4.39e- 7 HMDB0000641;HMD… 6.21e- 6
## 8 hsa00… Citrate… 6 15 1.88e- 6 HMDB0000094;HMD… 2.33e- 5
## 9 hsa00… D-Amino… 9 45 3.08e- 6 HMDB0000167;HMD… 3.39e- 5
## 10 hsa00… Glycine… 8 40 1.15e- 5 HMDB0000929;HMD… 1.14e- 4
## # ℹ 1 more variable: fold_enrichment <dbl>
str(ORA_cachexia_KEGG_sig)
## EnrchmnR [26 × 8] (S3: EnrichmentResult/tbl_df/tbl/data.frame)
## $ set_id : chr [1:26] "hsa05230" "hsa00970" "hsa04974" "hsa04978" ...
## $ set_name : chr [1:26] "Central carbon metabolism in cancer - Homo sapiens (human)" "Aminoacyl-tRNA biosynthesis - Homo sapiens (human)" "Protein digestion and absorption - Homo sapiens (human)" "Mineral absorption - Homo sapiens (human)" ...
## $ n_selected_in_set: int [1:26] 17 13 14 10 14 8 10 6 9 8 ...
## $ n_in_set : int [1:26] 35 23 47 27 82 26 47 15 45 40 ...
## $ p_value : num [1:26] 2.04e-18 2.44e-15 1.03e-11 1.12e-09 3.11e-08 ...
## $ overlap_features : chr [1:26] "HMDB0000883;HMDB0000929;HMDB0000687;HMDB0000168;HMDB0000161;HMDB0000641;HMDB0000187;HMDB0000094;HMDB0000158;HMD"| __truncated__ "HMDB0000883;HMDB0000929;HMDB0000687;HMDB0000168;HMDB0000167;HMDB0000161;HMDB0000641;HMDB0000187;HMDB0000158;HMD"| __truncated__ "HMDB0000883;HMDB0000929;HMDB0000687;HMDB0000168;HMDB0000167;HMDB0000161;HMDB0000641;HMDB0000187;HMDB0000158;HMD"| __truncated__ "HMDB0000883;HMDB0000929;HMDB0000687;HMDB0000168;HMDB0000167;HMDB0000161;HMDB0000641;HMDB0000187;HMDB0000172;HMDB0000123" ...
## $ p_adj : num [1:26] 2.02e-16 1.21e-13 3.39e-10 2.78e-08 6.15e-07 ...
## $ fold_enrichment : num [1:26] 16.77 19.51 10.28 12.79 5.89 ...
## - attr(*, "metadata")=List of 6
## ..$ mapping_name : chr "KEGG_pathway_HMDB"
## ..$ feature_id_type: chr "HMDB"
## ..$ feature_species: chr "Homo sapiens"
## ..$ set_source : chr "KEGG"
## ..$ version : chr "2025-11-29"
## ..$ description : chr "KEGG pathway to HMDB metabolites via metaboliteIDmapping"
This can be visualized by different approaches.
First clean the Pathway names
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
clean_pathway_name <- function(x) {
gsub(" - Homo sapiens \\(human\\)$", "", x, perl = TRUE) |>
trimws()
}
ORA_cachexia_KEGG_sig <- ORA_cachexia_KEGG_sig %>%
dplyr::mutate(set_name_clean = clean_pathway_name(set_name))
top20 <- ORA_cachexia_KEGG_sig %>% arrange(p_adj)
# %>% dplyr::slice(1:20)
# IMPORTANT: nivells únics per ordenar el gràfic
top20$set_name_clean <- factor(
top20$set_name_clean,
levels = rev(unique(top20$set_name_clean))
)
ggplot(top20, aes(x = set_name_clean, y = -log10(p_adj))) +
geom_col(fill = "#3B82F6") +
coord_flip() +
labs(
title = "Cachexia – KEGG Enrichment (Top 20)",
x = "Pathway",
y = "-log10(FDR)"
) +
theme_minimal(base_size = 12)

ggplot(top20, aes(x = -log10(p_adj), y = set_name_clean)) +
geom_segment(aes(x = 0, xend = -log10(p_adj),
y = set_name_clean, yend = set_name_clean),
color = "#999999") +
geom_point(size = 3, color = "#E11D48") +
labs(
title = "Cachexia – KEGG Enrichment (Top 20)",
x = "-log10(FDR)",
y = "Pathway"
) +
theme_minimal(base_size = 12)

top30 <- ORA_cachexia_KEGG_sig %>% arrange(p_adj)
# %>% dplyr::slice(1:30)
ggplot(top30, aes(
x = fold_enrichment,
y = -log10(p_adj),
size = n_selected_in_set,
color = -log10(p_adj)
)) +
geom_point(alpha = 0.8) +
scale_color_gradient(low = "#8B5CF6", high = "#1E40AF") +
labs(
title = "Cachexia – KEGG Enrichment (Bubble plot)",
x = "Fold Enrichment",
y = "-log10(FDR)",
size = "# hits"
) +
theme_minimal(base_size = 12)

ORA based on
SMPDBsets
We can proceed similarly with SMPDB
# ORA Cachexia vs SMPDB
ORA_cachexia_SMPDB <- ora_test(
eset = SMPDBset,
selected = cachexia_sig,
background = cachexia_background,
test = "fisher",
p_adjust = "BH",
min_set_size = 3,
max_set_size = 500
)
# Resultats significatius
ORA_cachexia_SMPDB_sig <- ORA_cachexia_SMPDB %>%
filter(p_adj < 0.05) %>%
arrange(p_adj)
nrow(ORA_cachexia_SMPDB_sig)
## [1] 87
head(ORA_cachexia_SMPDB_sig, 10)
## # A tibble: 10 × 8
## set_id set_name n_selected_in_set n_in_set p_value overlap_features p_adj
## <chr> <chr> <int> <int> <dbl> <chr> <dbl>
## 1 SMP0000… Nevirap… 10 41 1.09e-7 HMDB0000687;HMD… 3.42e-5
## 2 SMP0000… Azithro… 7 19 4.63e-7 HMDB0000883;HMD… 3.42e-5
## 3 SMP0000… Clarith… 7 19 4.63e-7 HMDB0000883;HMD… 3.42e-5
## 4 SMP0000… Clindam… 7 19 4.63e-7 HMDB0000883;HMD… 3.42e-5
## 5 SMP0000… Erythro… 7 19 4.63e-7 HMDB0000883;HMD… 3.42e-5
## 6 SMP0000… Roxithr… 7 19 4.63e-7 HMDB0000883;HMD… 3.42e-5
## 7 SMP0000… Telithr… 7 19 4.63e-7 HMDB0000883;HMD… 3.42e-5
## 8 SMP0000… Amikaci… 7 19 4.63e-7 HMDB0000883;HMD… 3.42e-5
## 9 SMP0000… Gentami… 7 19 4.63e-7 HMDB0000883;HMD… 3.42e-5
## 10 SMP0000… Kanamyc… 7 19 4.63e-7 HMDB0000883;HMD… 3.42e-5
## # ℹ 1 more variable: fold_enrichment <dbl>
top_smpdb <- ORA_cachexia_SMPDB_sig %>% # dplyr::slice(1:70) %>%
mutate(set_name = factor(set_name, levels = rev(set_name)))
ggplot(top_smpdb, aes(x = set_name, y = -log10(p_adj))) +
geom_col(fill = "#10B981") + # color verd (diferent del KEGG)
coord_flip() +
labs(
title = "Cachexia – SMPDB Enrichment (Top 20)",
x = "SMPDB pathway",
y = "-log10(FDR)"
) +
theme_minimal(base_size = 12)

ORA for chemical
classes
ORA_cachexia_Chem <- ora_test(
eset = ChemicalClassSet,
selected = cachexia_sig,
background = cachexia_background,
test = "fisher",
p_adjust = "BH",
min_set_size = 1,
max_set_size = 500
)
ORA_cachexia_Chem_sig <- ORA_cachexia_Chem %>% filter(p_adj < 0.25) %>%
arrange(p_adj)
nrow(ORA_cachexia_Chem_sig)
## [1] 12
head(ORA_cachexia_Chem_sig, 10)
## # A tibble: 10 × 8
## set_id set_name n_selected_in_set n_in_set p_value overlap_features p_adj
## <chr> <chr> <int> <int> <dbl> <chr> <dbl>
## 1 "Amin… "Amino … 10 15 5.23e-13 HMDB0000687;HMD… 6.28e-12
## 2 "Orga… "Organi… 8 24 1.59e- 7 HMDB0000232;HMD… 9.54e- 7
## 3 "Acyl… "Acylca… 2 2 8.26e- 4 HMDB0000062;HMD… 3.30e- 3
## 4 "Amin… "Amino … 2 4 4.77e- 3 HMDB0000267;HMD… 9.54e- 3
## 5 "Biog… "Biogen… 2 4 4.77e- 3 HMDB0000562;HMD… 9.54e- 3
## 6 "Orga… "Organi… 2 4 4.77e- 3 HMDB0000128;HMD… 9.54e- 3
## 7 "Amin… "Amino … 2 7 1.58e- 2 HMDB0000479;HMD… 2.71e- 2
## 8 "Carb… "Carbox… 1 1 2.90e- 2 HMDB0000072 4.34e- 2
## 9 "Nucl… "Nucleo… 1 2 5.71e- 2 HMDB0000157 6.85e- 2
## 10 "Orga… "Organo… 1 2 5.71e- 2 HMDB0000149 6.85e- 2
## # ℹ 1 more variable: fold_enrichment <dbl>
top_chem <- ORA_cachexia_Chem_sig %>%
dplyr::slice(1:10) %>%
mutate(set_name = factor(set_name, levels = rev(set_name)))
ggplot(top_chem, aes(x = set_name, y = -log10(p_adj))) +
geom_col(fill = "#F59E0B") + # taronja per diferenciar
coord_flip() +
labs(
title = "Cachexia – Chemical Class Enrichment (Top 20)",
x = "Chemical class",
y = "-log10(FDR)"
) +
theme_minimal(base_size = 12)
