suppressPackageStartupMessages({

library(readxl)
library(readr)

library(limma)
library(stringr)
library(rvest)
library(ggrepel)
library(kableExtra)
library(tidyverse)


library(POMA)
library(metabolomicsWorkbenchR)
library(SummarizedExperiment)

library(BiocFileCache)
library(enrichmet)
})

## Warning: package 'readxl' was built under R version 4.4.3

## Warning: package 'stringr' was built under R version 4.4.3

## Warning: package 'rvest' was built under R version 4.4.3

## Warning: package 'ggplot2' was built under R version 4.4.3

## Warning: package 'tibble' was built under R version 4.4.3

## Warning: package 'purrr' was built under R version 4.4.3

## Warning: package 'forcats' was built under R version 4.4.3

## Warning: package 'matrixStats' was built under R version 4.4.3

## Warning: package 'dbplyr' was built under R version 4.4.3

1 Introduction

This document contains the preprocessing and analysis of two metabolomics datasets that will be used for benchmarking several enrichment analysis packages.

The datasets are:

The Cachexia dataset a well known eMPLE DATASET USED IN mETABOaNALYST
The MST000291 dataset used by the fobitools package as an example and available in MetabolomicsWorkbench

2 The cachexia dataset

This is an example introduced by Xuia et alt. in MetaboAnalyst and is used in most tutorials of this software.

The dataset is available as a .csv file in the metaboData repository in github (https://github.com/nutrimetabolomics/metaboData/blob/main/Datasets/2024-Cachexia/human_cachexia.csv). For implicity it has been downladed from there and re-stored as an Excel File.

human_cachexia <- read_excel("data/human_cachexia.xlsx")
str(human_cachexia)

## tibble [63 × 78] (S3: tbl_df/tbl/data.frame)
##  $ Metabolite      : chr [1:63] "1,6-Anhydro-beta-D-glucose" "1-Methylnicotinamide" "2-Aminobutyrate" "2-Hydroxyisobutyrate" ...
##  $ cachPIF_178     : num [1:63] 40.9 65.4 18.7 26.1 71.5 ...
##  $ cachPIF_087     : num [1:63] 62.2 340.4 24.3 41.7 67.4 ...
##  $ cachPIF_090     : num [1:63] 270.4 64.7 12.2 65.4 23.8 ...
##  $ cachNETL_005_V1 : num [1:63] 154.5 53 172.4 74.4 1199.9 ...
##  $ cachPIF_115     : num [1:63] 22.2 73.7 15.6 83.9 33.1 ...
##  $ cachPIF_110     : num [1:63] 212.7 31.8 18.4 80.6 47.9 ...
##  $ cachNETL_019_V1 : num [1:63] 151.41 36.6 8.67 42.52 223.63 ...
##  $ cachNETCR_014_V1: num [1:63] 31.5 6.82 4.18 12.94 25.03 ...
##  $ cachNETCR_014_V2: num [1:63] 51.42 30.27 7.54 34.81 80.64 ...
##  $ cachPIF_154     : num [1:63] 117.9 52.5 19.5 72.2 73.7 ...
##  $ cachNETL_022_V1 : num [1:63] 20.7 221.4 15.2 28.8 357.8 ...
##  $ cachNETL_022_V2 : num [1:63] 127.7 177.7 12.7 15 68 ...
##  $ cachNETL_008_V1 : num [1:63] 59.74 50.91 6.82 46.06 111.05 ...
##  $ cachPIF_146     : num [1:63] 89.1 32.8 10.4 32.1 32.5 ...
##  $ cachPIF_119     : num [1:63] 23.57 6.89 2.12 7.85 8.33 ...
##  $ cachPIF_099     : num [1:63] 41.26 8.67 2.56 7.85 6.89 ...
##  $ cachPIF_162     : num [1:63] 589.9 22 15.2 46.1 32.8 ...
##  $ cachPIF_160     : num [1:63] 112.2 25.3 15.5 47.9 28.8 ...
##  $ cachPIF_113     : num [1:63] 167.3 19.9 13.5 31.2 47.9 ...
##  $ cachPIF_143     : num [1:63] 183.09 90.92 8.94 64.07 20.49 ...
##  $ cachNETCR_007_V1: num [1:63] 208.51 53.52 5.26 47.94 212.72 ...
##  $ cachNETCR_007_V2: num [1:63] 34.8 95.6 23.6 68 287.1 ...
##  $ cachPIF_137     : num [1:63] 333.62 35.87 7.92 54.6 20.49 ...
##  $ cachPIF_100     : num [1:63] 32.46 9.68 3.9 11.02 170.72 ...
##  $ cachNETL_004_V1 : num [1:63] 4.71 11.13 43.38 30.88 104.58 ...
##  $ cachPIF_094     : num [1:63] 68.7 13.9 12.2 25 28.2 ...
##  $ cachPIF_132     : num [1:63] 214.9 127.7 31.5 33.8 88.2 ...
##  $ cachPIF_163     : num [1:63] 304.9 25.8 27.1 40.5 70.8 ...
##  $ cachNETCR_003_V1: num [1:63] 37.71 10.8 5 8.25 11.7 ...
##  $ cachNETL_028_V1 : num [1:63] 45.6 473.4 16.3 63.4 221.4 ...
##  $ cachNETL_028_V2 : num [1:63] 34.12 92.76 8.25 16.61 55.15 ...
##  $ cachNETCR_013_V1: num [1:63] 107.8 16.6 26.8 32.5 62.8 ...
##  $ cachNETL_020_V1 : num [1:63] 13.33 50.91 2.92 40.85 46.99 ...
##  $ cachNETL_020_V2 : num [1:63] 27.9 80.6 15.8 64.7 88.2 ...
##  $ cachPIF_192     : num [1:63] 141.2 68 40.9 12.8 26.1 ...
##  $ cachNETCR_012_V1: num [1:63] 14 46.1 29.1 24.5 64.1 ...
##  $ cachNETCR_012_V2: num [1:63] 244.7 116.8 40 61.6 174.2 ...
##  $ cachPIF_089     : num [1:63] 124 81.5 55.1 70.8 92.8 ...
##  $ cachNETCR_002_V1: num [1:63] 141.2 28.5 20.3 14.3 97.5 ...
##  $ cachPIF_179     : num [1:63] 35.16 26.58 5.21 30.27 7.39 ...
##  $ cachPIF_114     : num [1:63] 685.4 36.2 32.5 85.6 25 ...
##  $ cachNETCR_006_V1: num [1:63] 278.7 40.5 55.1 51.4 74.4 ...
##  $ cachPIF_141     : num [1:63] 15.8 23.6 18 37.3 21.3 ...
##  $ cachNETCR_025_V1: num [1:63] 29.96 96.54 6.55 65.37 1053.63 ...
##  $ cachNETCR_025_V2: num [1:63] 16.95 114.43 2.53 77.48 2465.13 ...
##  $ cachNETCR_016_V1: num [1:63] 292.9 58 167.3 82.3 468.7 ...
##  $ cachPIF_116     : num [1:63] 29.67 70.11 5.58 18.73 5.53 ...
##  $ ctrlPIF_191     : num [1:63] 18.92 24.53 3.29 10.49 9.68 ...
##  $ ctrlPIF_164     : num [1:63] 127.74 1032.77 8.58 66.02 38.09 ...
##  $ ctrlNETL_013_V1 : num [1:63] 34.81 12.3 5.87 15.18 16.78 ...
##  $ ctrlPIF_188     : num [1:63] 65.37 24.05 4.71 15.8 7.24 ...
##  $ ctrlPIF_195     : num [1:63] 15.18 94.63 11.36 8.17 5.64 ...
##  $ ctrlNETCR_015_V1: num [1:63] 70.8 75.9 22.6 61 230.4 ...
##  $ ctrlPIF_102     : num [1:63] 25.28 101.49 8.33 59.15 88.23 ...
##  $ ctrlNETL_010_V1 : num [1:63] 34.47 12.81 3.78 8.33 14.3 ...
##  $ ctrlNETL_010_V2 : num [1:63] 18.54 8.41 3.78 4.85 8.08 ...
##  $ ctrlNETL_001_V1 : num [1:63] 37.34 55.15 7.39 36.23 75.94 ...
##  $ ctrlNETCR_015_V2: num [1:63] 33.8 53.5 18.2 46.5 81.5 ...
##  $ ctrlNETCR_005_V1: num [1:63] 22.4 55.1 20.7 38.5 164 ...
##  $ ctrlPIF_111     : num [1:63] 146.9 10.1 6.3 27.9 24.1 ...
##  $ ctrlPIF_171     : num [1:63] 64.07 6.42 28.79 18.92 85.63 ...
##  $ ctrlNETCR_008_V1: num [1:63] 32.46 14.01 2.97 5.16 8.08 ...
##  $ ctrlNETCR_008_V2: num [1:63] 113.3 43.38 4.66 27.11 22.42 ...
##  $ ctrlNETL_017_V1 : num [1:63] 22.2 20.7 7.85 19.69 38.47 ...
##  $ ctrlNETL_017_V2 : num [1:63] 46.53 9.78 3.1 9.3 10.59 ...
##  $ ctrlNETL_002_V1 : num [1:63] 192.48 108.85 7.77 46.06 55.15 ...
##  $ ctrlNETL_002_V2 : num [1:63] 528.5 225.9 13.5 93.7 230.4 ...
##  $ ctrlPIF_190     : num [1:63] 28.79 9.21 5.53 17.64 14.44 ...
##  $ ctrlNETCR_009_V1: num [1:63] 181.27 48.42 8.94 51.94 982.4 ...
##  $ ctrlNETCR_009_V2: num [1:63] 47.47 7.69 4.06 9.3 65.37 ...
##  $ ctrlNETL_007_V1 : num [1:63] 15.96 16.12 1.93 15.8 25.28 ...
##  $ ctrlPIF_112     : num [1:63] 22.87 10.38 1.28 5.58 8.5 ...
##  $ ctrlNETCR_019_V2: num [1:63] 35.2 52.5 13.9 44.3 99.5 ...
##  $ ctrlNETL_012_V1 : num [1:63] 16.9 15.8 10.5 22.4 62.8 ...
##  $ ctrlNETL_012_V2 : num [1:63] 9.39 14.01 5.16 23.57 46.99 ...
##  $ ctrlNETL_003_V1 : num [1:63] 37.7 18.2 26.1 15 23.3 ...
##  $ ctrlNETL_003_V2 : num [1:63] 38.5 12.6 15 12.6 22.2 ...

2.1 Selection of differentially quantified metabolites

This dataset contains only metabolites that have been shown to be significantly different among the groups.

This is confirmed by the analysis performed in the following.

# 1. Definir el vector de grups a partir dels noms de les columnes
sample_names <- colnames(human_cachexia)[-1]   # excepte 'Metabolite'

group <- ifelse(str_detect(sample_names, "^ctrl"), "Control", "Cachexia")
group <- factor(group)

# 2. Construir la matriu d'expressió (metabolits x mostres)
X <- as.matrix(human_cachexia[ , -1])
rownames(X) <- human_cachexia$Metabolite

# 3. Disseny experimental
design <- model.matrix(~ 0+group)

# 4. Ajustar model amb limma
fit <- lmFit(X, design)
fit <- eBayes(fit)

# 5. Resultats
res <- topTable(fit, n=Inf)
tail(res)

# 6. Mostrar els primers resultats
print(res)

##                            groupCachexia groupControl     AveExpr          F
## 2-Hydroxyisobutyrate           43.237660    27.871000   37.250649 102.108709
## Creatinine                  10722.140213  5619.174667 8733.971818  88.727152
## Quinolinate                    83.747234    39.324000   66.439481  85.721150
## Valine                         45.582553    20.132667   35.667013  72.840052
## Dimethylamine                 453.580638   208.683333  358.166104  68.123894
## Tryptophan                     81.824043    41.833000   66.243117  65.057316
## Methylamine                    21.216383    11.360000   17.376234  63.658759
## Pyroglutamate                 270.292340   119.258000  211.447792  62.106598
## Leucine                        31.261702    13.556667   24.363636  62.078871
## Asparagine                     75.390851    41.749000   62.283636  59.785143
## N,N-Dimethylglycine            34.489787    13.596667   26.349610  58.618084
## tau-Methylhistidine           105.667660    64.650333   89.686883  58.028331
## Threonine                     118.233191    59.518667   95.357403  56.120211
## Betaine                       112.252979    55.970333   90.324675  56.039351
## Alanine                       347.591064   157.584000  273.562338  55.825450
## 3-Indoxylsulfate              265.157660   146.376333  218.879221  55.619789
## Fucose                        108.599149    57.444667   88.668831  55.197137
## Glutamine                     391.410426   174.427333  306.871558  55.150421
## Serine                        245.829787   122.263000  197.686883  53.127289
## Ethanolamine                  326.772128   197.125333  276.260390  51.936097
## Citrate                      2720.852766  1474.718667 2235.345974  47.687718
## Glycolate                     219.269574   138.983667  187.989351  45.626282
## trans-Aconitate                48.814043    27.809333   40.630390  45.389616
## Hypoxanthine                   67.087021    51.714333   61.097662  44.369447
## Guanidoacetate                 97.624255    68.739667   86.370519  43.688379
## Tyrosine                      100.742340    52.014000   81.757273  43.591865
## Isoleucine                      9.660851     7.218000    8.709091  40.944145
## Histidine                     364.232340   180.472333  292.637532  40.085486
## Uracil                         37.513617    32.493333   35.557662  39.174458
## Glycine                      1069.377872   585.149333  880.717403  37.753813
## 3-Hydroxybutyrate              29.260638     9.898667   21.717013  34.620652
## 3-Hydroxyisovalerate           27.606383    12.312667   21.647792  34.011973
## Acetate                        85.632979    35.604667   66.141429  33.488539
## 4-Hydroxyphenylacetate        119.822553    99.798667  112.021039  33.419405
## myo-Inositol                  181.837660    62.641333  135.397532  32.567554
## Pyruvate                       26.865532    12.566333   21.294416  29.607625
## Lysine                        121.282340    89.229333  108.794156  29.314284
## Hippurate                    2875.729574  1364.240333 2286.837662  28.826437
## Formate                       187.564468    84.483333  147.402987  28.648573
## 1,6-Anhydro-beta-D-glucose    128.688936    69.505333  105.630390  28.612486
## cis-Aconitate                 276.025532    91.724000  204.219740  27.616880
## Taurine                       655.720000   320.522333  525.123506  27.193100
## Succinate                      79.628936    29.836000   60.229091  24.236966
## Trimethylamine N-oxide        820.340638   388.669000  652.156883  23.010912
## Trigonelline                  359.637660   130.687000  270.436104  22.529110
## Methylguanidine                17.364681    12.128333   15.324545  22.429370
## Carnitine                      64.622128    32.443667   52.085065  21.662941
## pi-Methylhistidine            441.553191   258.640000  370.288312  20.356000
## 2-Aminobutyrate                23.669149     9.528333   18.159740  19.570994
## O-Acetylcarnitine              25.564468    10.598000   19.733377  14.769399
## Fumarate                       10.921915     4.552000    8.440130  13.782101
## Adipate                        34.817872     8.993333   24.756364  12.299719
## 1-Methylnicotinamide           70.564255    73.155000   71.573636  11.074201
## Creatine                      174.913404    51.504333  126.831948  10.655932
## Pantothenate                   39.944043    52.622667   44.883766  10.485120
## Acetone                        13.346383     8.420000   11.427013   9.177920
## Glucose                       827.218936   140.958000  559.844545   9.022591
## Sucrose                       150.024468    55.579667  113.227792   8.825967
## 2-Oxoglutarate                183.110426    85.517333  145.087143   7.779821
## 3-Aminoisobutyrate            100.274681    39.911000   76.756364   7.280801
## Xylose                        129.289149    56.509333  100.933377   7.162336
## Lactate                       217.631915    65.748333  158.456494   6.860428
## Tartrate                       47.234681    28.676000   40.004026   6.063205
##                                 P.Value    adj.P.Val
## 2-Hydroxyisobutyrate       3.112959e-22 1.961164e-20
## Creatinine                 1.400976e-20 4.413076e-19
## Quinolinate                3.481955e-20 7.312105e-19
## Valine                     2.252837e-18 3.548218e-17
## Dimethylamine              1.172135e-17 1.476890e-16
## Tryptophan                 3.564558e-17 3.742786e-16
## Methylamine                5.985424e-17 5.386882e-16
## Pyroglutamate              1.072912e-16 7.589719e-16
## Leucine                    1.084246e-16 7.589719e-16
## Asparagine                 2.612755e-16 1.646035e-15
## N,N-Dimethylglycine        4.120067e-16 2.359675e-15
## tau-Methylhistidine        5.197250e-16 2.728556e-15
## Threonine                  1.112844e-15 5.173767e-15
## Betaine                    1.149726e-15 5.173767e-15
## Alanine                    1.253462e-15 5.264542e-15
## 3-Indoxylsulfate           1.362272e-15 5.363946e-15
## Fucose                     1.617376e-15 5.769520e-15
## Glutamine                  1.648434e-15 5.769520e-15
## Serine                     3.791818e-15 1.257287e-14
## Ethanolamine               6.246496e-15 1.967646e-14
## Citrate                    3.916828e-14 1.175048e-13
## Glycolate                  9.866970e-14 2.825541e-13
## trans-Aconitate            1.098708e-13 3.009504e-13
## Hypoxanthine               1.752710e-13 4.600864e-13
## Guanidoacetate             2.401729e-13 6.052356e-13
## Tyrosine                   2.511910e-13 6.086552e-13
## Isoleucine                 8.782360e-13 2.049217e-12
## Histidine                  1.329937e-12 2.992358e-12
## Uracil                     2.075990e-12 4.509909e-12
## Glycine                    4.201570e-12 8.823297e-12
## 3-Hydroxybutyrate          2.087031e-11 4.241385e-11
## 3-Hydroxyisovalerate       2.872418e-11 5.655073e-11
## Acetate                    3.788622e-11 7.232824e-11
## 4-Hydroxyphenylacetate     3.930314e-11 7.282641e-11
## myo-Inositol               6.197112e-11 1.115480e-10
## Pyruvate                   3.150939e-10 5.514144e-10
## Lysine                     3.716309e-10 6.327769e-10
## Hippurate                  4.897819e-10 8.120069e-10
## Formate                    5.419160e-10 8.712437e-10
## 1,6-Anhydro-beta-D-glucose 5.531706e-10 8.712437e-10
## cis-Aconitate              9.796718e-10 1.505349e-09
## Taurine                    1.252803e-09 1.879204e-09
## Succinate                  7.291636e-09 1.068309e-08
## Trimethylamine N-oxide     1.551746e-08 2.221818e-08
## Trigonelline               2.096638e-08 2.935294e-08
## Methylguanidine            2.232087e-08 3.056989e-08
## Carnitine                  3.623601e-08 4.857167e-08
## pi-Methylhistidine         8.400344e-08 1.102545e-07
## 2-Aminobutyrate            1.404706e-07 1.806051e-07
## O-Acetylcarnitine          3.832963e-06 4.829533e-06
## Fumarate                   7.852109e-06 9.699664e-06
## Adipate                    2.365777e-05 2.866230e-05
## 1-Methylnicotinamide       6.036273e-05 7.175193e-05
## Creatine                   8.354921e-05 9.747408e-05
## Pantothenate               9.548709e-05 1.093761e-04
## Acetone                    2.696052e-04 3.033059e-04
## Glucose                    3.055788e-04 3.377450e-04
## Sucrose                    3.582931e-04 3.891804e-04
## 2-Oxoglutarate             8.451408e-04 9.024385e-04
## 3-Aminoisobutyrate         1.281514e-03 1.345590e-03
## Xylose                     1.415587e-03 1.461999e-03
## Lactate                    1.826335e-03 1.855792e-03
## Tartrate                   3.608916e-03 3.608916e-03

save(res, file="data/Cachexia_topTable.Rda")
write.table(rownames(res), file="data/cachexia_ids.txt", row.names = FALSE, quote=FALSE)

2.2 Selected Metabolites and its identifiers

Metabolites in the table are identified by their common names, a type of identifier very useful for scientists working with these metabolites at chemical or nutritional level. However, most enrichment analysis tools require a more objective ID from public databases such as PubChem, HumanDB or KEGG. The associated identifiers for these metabolites, when available, have been obtained using MetaboAnalyst Compound ID converter.

cachexia_map <- read.csv("data/Cachexia_map.csv")
head(cachexia_map)

The obtained identifiers may be used as input for enrichment analysis.

3 The ST000291 dataset

This dataset comes from a study that has bee uploaded to Metabolomics Workbench with ID ST0000291: LC-MS Based Approaches to Investigate Metabolomic Differences in the Urine of Young Women after Drinking Cranberry Juice or Apple Juice

Once downloaded the data consists of three separate files

The features data
- 1541 variables, 45 samples
The metadata
- 45 rows, two columns (sample name, group name)

The metabolite names - 1541 row, 3 columns (original name, PubChem ID and KEGG ID)

Indeed the downloaded data are stored in a particular format “MWTab” so some functions to extract data from it have been prepared.

WARNING

Although the original study followed a 2×2 crossover design, the Metabolomics Workbench dataset (ST000291) does not include subject identifiers, and therefore paired or repeated-measures models cannot be applied. As a result, treatment groups must be analysed as independent samples.

This choice is conservative: ignoring the within-subject pairing increases residual variance and therefore raises the probability of false negatives, not false positives. Consequently, any metabolites that remain significant under this independent-samples analysis are likely to represent strong and biologically robust treatment effects.

3.1 Data loading and preprocessing

Data in Metabolomics Workbench is not preprocessed so some work has to be performed before they can be analyzed.

The preprocessing steps are performed using the POMA Bioconductor package.

data_negative_mode <- do_query(
  context = "study",
  input_item = "analysis_id",
  input_value = "AN000465",
  output_item = "SummarizedExperiment")

data_positive_mode <- do_query(
  context = "study",
  input_item = "analysis_id",
  input_value = "AN000464",
  output_item = "SummarizedExperiment")

## ----metabolitenames, message = FALSE, warning = FALSE, comment = FALSE, echo = FALSE, fig.align = "center", out.width = "100%", fig.cap = 'Metabolite identifiers of the ST000291 Metabolomics Workbench study.'----
# knitr::include_graphics("figure/ST000291_names.png")

## ----warning = FALSE, message = FALSE, comment = FALSE------------------------
metaboliteNamesURL <- "https://www.metabolomicsworkbench.org/data/show_metabolites_by_study.php?STUDY_ID=ST000291&SEARCH_TYPE=KNOWN&STUDY_TYPE=MS&RESULT_TYPE=1"

metaboliteNames <- metaboliteNamesURL %>% 
  read_html() %>% 
  html_nodes(".datatable")

metaboliteNames_negative <- metaboliteNames %>%
  .[[1]] %>%
  html_table() %>%
  dplyr::select(`Metabolite Name`, PubChemCompound_ID, `Kegg Id`)

metaboliteNames_positive <- metaboliteNames %>%
  .[[2]] %>%
  html_table() %>%
  dplyr::select(`Metabolite Name`, PubChemCompound_ID, `Kegg Id`)

metaboliteNames <- bind_rows(metaboliteNames_negative, metaboliteNames_positive) %>%
  dplyr::rename(names = 1, PubChem = 2, KEGG = 3) %>%
  mutate(KEGG = ifelse(KEGG == "-", "UNKNOWN", KEGG),
         PubChem = ifelse(PubChem == "-", "UNKNOWN", PubChem)) %>%
  filter(!duplicated(PubChem))

## ----warning = FALSE, message = FALSE, comment = FALSE------------------------
## negative mode features 

features_negative <- assay(data_negative_mode) %>%
  dplyr::slice(-n())
rownames(features_negative) <- rowData(data_negative_mode)$metabolite[1:(length(rowData(data_negative_mode)$metabolite)-1)]

## positive mode features
features_positive <- assay(data_positive_mode) %>%
  dplyr::slice(-n())
rownames(features_positive) <- rowData(data_positive_mode)$metabolite[1:(length(rowData(data_positive_mode)$metabolite)-1)]

## combine positive and negative mode and set PubChem IDs as feature names
features <- bind_rows(features_negative, features_positive) %>%
  tibble::rownames_to_column("names") %>%
  right_join(metaboliteNames, by = "names") %>%
  select(-names, -KEGG) %>%
  tibble::column_to_rownames("PubChem")

## metadata
pdata <- colData(data_negative_mode) %>% # or "data_positive_mode". They are equal
  as.data.frame() %>%
  tibble::rownames_to_column("ID") %>%
  mutate(Treatment = case_when(Treatment == "Baseline urine" ~ "Baseline",
                               Treatment == "Urine after drinking apple juice" ~ "Apple",
                               Treatment == "Urine after drinking cranberry juice" ~ "Cranberry"))

## ----warning = FALSE, message = FALSE, comment = FALSE------------------------
pdata$Treatment <- as.character(pdata$Treatment)
pdata$Treatment <- factor(pdata$Treatment,
                          levels = c("Baseline", "Apple", "Cranberry"))

data_sumexp <- PomaCreateObject(metadata = pdata, features = t(features))


data_preprocessed <- data_sumexp %>%
  PomaImpute() %>%             # sense group_by
  PomaNorm()

## 182 features removed.

The normalized, imputed feature matrix is now:

expr_all <- assay(data_preprocessed)              # rows: features (PubChem), cols: samples
sample_metadata <- colData(data_preprocessed) %>%
  as.data.frame()

dim(expr_all)

## [1] 1359   45

head(expr_all[, 1:5])

##                  b1        b10       b11        b12        b13
## X443489   0.6947997  0.4887623 0.2874114  0.6073592 -0.8454956
## X107754   1.2285717  1.0538299 2.0280299 -1.6045545 -0.3914194
## X9543071  0.4526208  0.1373599 6.0661877  0.3424920  0.1880878
## X11011465 0.3876013 -1.1626013 1.0143621  0.6077095  0.2879577
## X5281160  0.8226910 -0.5901961 3.0649147 -0.3466212  0.4142565
## X440341   0.4704552  1.3992524 1.3624330  0.6902061  1.9507883

3.2 Selection of differential metabolites

We focus on Baseline vs Cranberry, ignoring Apple for the main comparison.

limma_resCB <- data_preprocessed %>%
  PomaLimma(contrast = "Cranberry-Baseline", adjust = "fdr") %>%
  dplyr::rename(PubChemCID = feature) %>% 
  dplyr::mutate(PubChemCID = gsub("X", "", PubChemCID)) %>%
  filter(PubChemCID != "UNKNOWN")

# show the first 10 features
limma_resCB %>%
  dplyr::slice(1L:10L) %>%
  kbl(row.names = FALSE, booktabs = TRUE) %>%
  kable_styling(latex_options = c("striped"))

PubChemCID	log2FC	t	pvalue	adj_pvalue	B
54678503	2.916890	6.446862	1.0e-07	0.0000847	8.108057
71485	3.267987	5.853017	5.0e-07	0.0003289	6.194381
94214	2.180373	5.691638	8.0e-07	0.0003817	5.676529
5378303	2.414795	5.543843	1.4e-06	0.0004030	5.203836
3037	3.179669	5.463171	1.8e-06	0.0004030	4.946595
10178	3.331534	5.454885	1.9e-06	0.0004030	4.920207
443071	3.615367	5.402912	2.3e-06	0.0004030	4.754851
5486800	-1.784212	-5.388792	2.4e-06	0.0004030	4.709975
17531	2.262358	5.318449	3.0e-06	0.0004550	4.486739
3035199	1.970513	5.048492	7.5e-06	0.0010196	3.635733

# save(limma_res, file="data/MW_ST000291-limma_res.Rda")

3.3 Restictring the analysis to metabolites with HMDB_ID

In order to verify some calculations we wish to compare the results with those obtained with MetaboAnalyst. This requires metabolites to have either KEGG or HMDB identifiers.

Pubchem IDs,converted to other systems are available from file MW_ST000291__map.csv

MW_map <- readr::read_csv("data/MW_ST000291__map.csv", show_col_types = FALSE) %>%
  janitor::clean_names() %>%
  mutate(
    PubChemCID = as.character(query),  # alinyem nom
    HMDB = ifelse(hmdb == "" | is.na(hmdb), NA, hmdb)
  ) %>%
  select(PubChemCID, HMDB)

head(MW_map)

First we filter out all metabolites not mapped to HMDB

expr_all <- assay(data_preprocessed)              # rows = PubChem, cols = samples
metadata <- colData(data_preprocessed) %>% as.data.frame()

pubchem_ids <- gsub("^X", "", rownames(expr_all))
map_expr <- tibble(
  PubChemCID = pubchem_ids
) %>%
  left_join(MW_map, by = "PubChemCID")

keep <- !is.na(map_expr$HMDB)
data_preprocessed_HMDB <- data_preprocessed[keep, ]
head(assay(data_preprocessed_HMDB))

##                   b1        b10         b11        b12         b13        b14
## X1493     -0.3857251  0.5896296  0.54742922  0.1164362 -0.47291789 -0.4337673
## X439287    0.8480117  0.6384995  1.19732710  0.9622080  0.33633347  1.0182369
## X11954071  1.7701857 -1.8913284 -0.25736257  1.9016509 -2.07200711  1.0347154
## X955       0.1893927  0.2560652  0.45048446 -0.7754898  3.13547259  0.1031689
## X439435    0.5253812  0.5872576  1.11729290  1.5091126  0.37094978  0.2745189
## X5280417   0.5115108  0.3041352  0.07776652  0.8087114  0.02411856  0.2849774
##                   b15        b16          b17         b2         b4          b6
## X1493     -0.50339528 -0.5980115 -1.264429067 1.02806969  1.7935068 -1.06711263
## X439287    1.14709740 -1.1610238 -0.530515703 1.05994669  0.3079829 -0.57782609
## X11954071 -0.81381238 -0.1129643 -6.590653562 0.38510617  2.8580052 -0.18370515
## X955      -0.41612685 -1.9867349 -0.005223076 0.09744963 -0.4141981  0.02609954
## X439435    0.69287277 -0.3734512 -0.943917161 0.33415231  0.8340301  0.16946371
## X5280417   0.07079359 -1.2176732 -1.986746348 0.55775455 -0.6177386 -0.22425248
##                   b7         b8         b9          a1         a10         a11
## X1493     -1.4476851 -0.2204205 -1.5104792  0.65170919 -0.87747074  1.39380953
## X439287   -0.5268649  0.9331002 -1.0162866 -1.60633724 -0.45853608 -0.12101487
## X11954071  0.9241943  1.4818335  0.6469588 -1.29141809 -1.54597449  2.44324527
## X955      -0.3763935  0.4011452 -0.4778704 -0.22378193  0.18099625 -0.08748566
## X439435   -0.3964235  0.2480054 -0.8538239 -1.96555512 -0.06216092  0.86879922
## X5280417  -0.9627714 -0.5529711 -0.8311039 -0.08952971  2.97574517  3.17851086
##                  a12        a13        a14        a15        a16         a17
## X1493      2.1569911 -1.7455285 -0.6923914  0.2035022 -0.5004495  0.82366064
## X439287    1.3242346 -0.5063997 -0.4415263  0.6427600 -2.1637777 -1.57477381
## X11954071  0.4952610  1.7717785 -2.7647214 -1.0158740 -0.5289327  0.02863751
## X955      -0.4799279  0.4663974 -0.5772599 -0.8356482 -2.9098726 -0.34177311
## X439435    1.7223901  0.3047137 -0.7981083  0.1694637 -1.0097961 -1.76604313
## X5280417   1.0528224 -0.5122198 -0.1817010  0.2425141 -0.8235913 -0.24878124
##                   a2          a4         a6          a7         a8         a9
## X1493      1.5290714 -0.03154813 -0.9065260  0.05353628  1.3642407 -0.4902117
## X439287    1.3672212  0.54039379 -0.6789370 -0.02407401 -0.5244376 -0.2179557
## X11954071  2.0362327  0.80543563 -2.5731447  1.05936330  2.3560964  1.4571594
## X955      -0.2997101  0.27541056  0.3402795 -0.24953423  0.1312785 -0.3970065
## X439435    0.7141060  0.34654658 -0.7765412  0.01660204  0.1646760 -1.6639785
## X5280417   0.2625771 -0.82640131 -0.7717874  0.32024119  3.3518645 -0.9837162
##                    c1          c10        c11        c12         c13
## X1493      0.45684713  0.224058670  0.1895424  1.6307984 -1.88322973
## X439287    0.55888579  0.432807707 -0.1640363  1.4969648 -0.51118131
## X11954071 -1.89249794 -1.101837458  1.2320961 -0.2896689  0.86798501
## X955      -0.08610293  0.450484459  0.5276003  0.4504845  2.76915890
## X439435   -0.73455888 -0.005368297  0.6170156  1.1856602  0.54637070
## X5280417   0.10175942  0.413717719 -0.2291056 -0.8245270  0.09155411
##                   c14        c15        c16         c17          c2          c4
## X1493      1.05632816 -1.0861729 -0.1924996  0.85524923  1.03947573 -1.41584273
## X439287    0.95258966  0.4681795  0.8409154 -0.52080656 -0.05545967 -1.00316823
## X11954071 -2.47134436 -0.2085014 -0.9126684 -0.28447523  1.37902016  0.52460407
## X955       2.45478738  0.0695257 -1.6123407 -0.60726340  0.13238643  0.17994164
## X439435   -0.03342162  1.0078244 -0.1279754 -0.96630600  0.49679919 -0.37345123
## X5280417   0.85717358  0.3744679 -1.7591859 -0.03289151  0.14147323  0.06376681
##                   c6         c7         c8         c9
## X1493      1.1077750 -0.8492909  1.4392574 -1.6758193
## X439287   -0.4827946 -1.3680218 -0.4619694 -0.3759711
## X11954071  0.6682649 -2.5308987  1.3248235  1.8811378
## X955       0.7755421 -0.4141981 -0.7480831  0.4584730
## X439435    0.4675047 -0.2858048 -1.2779528 -0.8768713
## X5280417  -0.3596963 -1.8702919  0.4490377 -0.6103103

hmdb_ids <- map_expr$HMDB[keep]
rownames(data_preprocessed)[keep] <- hmdb_ids
data_preprocessed_HMDB <- data_preprocessed[keep, ]
head(assay(data_preprocessed_HMDB))

##                     b1        b10         b11        b12         b13        b14
## HMDB0245462 -0.3857251  0.5896296  0.54742922  0.1164362 -0.47291789 -0.4337673
## HMDB0001351  0.8480117  0.6384995  1.19732710  0.9622080  0.33633347  1.0182369
## HMDB0062198  1.7701857 -1.8913284 -0.25736257  1.9016509 -2.07200711  1.0347154
## HMDB0304087  0.1893927  0.2560652  0.45048446 -0.7754898  3.13547259  0.1031689
## HMDB0003503  0.5253812  0.5872576  1.11729290  1.5091126  0.37094978  0.2745189
## HMDB0029263  0.5115108  0.3041352  0.07776652  0.8087114  0.02411856  0.2849774
##                     b15        b16          b17         b2         b4
## HMDB0245462 -0.50339528 -0.5980115 -1.264429067 1.02806969  1.7935068
## HMDB0001351  1.14709740 -1.1610238 -0.530515703 1.05994669  0.3079829
## HMDB0062198 -0.81381238 -0.1129643 -6.590653562 0.38510617  2.8580052
## HMDB0304087 -0.41612685 -1.9867349 -0.005223076 0.09744963 -0.4141981
## HMDB0003503  0.69287277 -0.3734512 -0.943917161 0.33415231  0.8340301
## HMDB0029263  0.07079359 -1.2176732 -1.986746348 0.55775455 -0.6177386
##                      b6         b7         b8         b9          a1
## HMDB0245462 -1.06711263 -1.4476851 -0.2204205 -1.5104792  0.65170919
## HMDB0001351 -0.57782609 -0.5268649  0.9331002 -1.0162866 -1.60633724
## HMDB0062198 -0.18370515  0.9241943  1.4818335  0.6469588 -1.29141809
## HMDB0304087  0.02609954 -0.3763935  0.4011452 -0.4778704 -0.22378193
## HMDB0003503  0.16946371 -0.3964235  0.2480054 -0.8538239 -1.96555512
## HMDB0029263 -0.22425248 -0.9627714 -0.5529711 -0.8311039 -0.08952971
##                     a10         a11        a12        a13        a14        a15
## HMDB0245462 -0.87747074  1.39380953  2.1569911 -1.7455285 -0.6923914  0.2035022
## HMDB0001351 -0.45853608 -0.12101487  1.3242346 -0.5063997 -0.4415263  0.6427600
## HMDB0062198 -1.54597449  2.44324527  0.4952610  1.7717785 -2.7647214 -1.0158740
## HMDB0304087  0.18099625 -0.08748566 -0.4799279  0.4663974 -0.5772599 -0.8356482
## HMDB0003503 -0.06216092  0.86879922  1.7223901  0.3047137 -0.7981083  0.1694637
## HMDB0029263  2.97574517  3.17851086  1.0528224 -0.5122198 -0.1817010  0.2425141
##                    a16         a17         a2          a4         a6
## HMDB0245462 -0.5004495  0.82366064  1.5290714 -0.03154813 -0.9065260
## HMDB0001351 -2.1637777 -1.57477381  1.3672212  0.54039379 -0.6789370
## HMDB0062198 -0.5289327  0.02863751  2.0362327  0.80543563 -2.5731447
## HMDB0304087 -2.9098726 -0.34177311 -0.2997101  0.27541056  0.3402795
## HMDB0003503 -1.0097961 -1.76604313  0.7141060  0.34654658 -0.7765412
## HMDB0029263 -0.8235913 -0.24878124  0.2625771 -0.82640131 -0.7717874
##                      a7         a8         a9          c1          c10
## HMDB0245462  0.05353628  1.3642407 -0.4902117  0.45684713  0.224058670
## HMDB0001351 -0.02407401 -0.5244376 -0.2179557  0.55888579  0.432807707
## HMDB0062198  1.05936330  2.3560964  1.4571594 -1.89249794 -1.101837458
## HMDB0304087 -0.24953423  0.1312785 -0.3970065 -0.08610293  0.450484459
## HMDB0003503  0.01660204  0.1646760 -1.6639785 -0.73455888 -0.005368297
## HMDB0029263  0.32024119  3.3518645 -0.9837162  0.10175942  0.413717719
##                    c11        c12         c13         c14        c15        c16
## HMDB0245462  0.1895424  1.6307984 -1.88322973  1.05632816 -1.0861729 -0.1924996
## HMDB0001351 -0.1640363  1.4969648 -0.51118131  0.95258966  0.4681795  0.8409154
## HMDB0062198  1.2320961 -0.2896689  0.86798501 -2.47134436 -0.2085014 -0.9126684
## HMDB0304087  0.5276003  0.4504845  2.76915890  2.45478738  0.0695257 -1.6123407
## HMDB0003503  0.6170156  1.1856602  0.54637070 -0.03342162  1.0078244 -0.1279754
## HMDB0029263 -0.2291056 -0.8245270  0.09155411  0.85717358  0.3744679 -1.7591859
##                     c17          c2          c4         c6         c7
## HMDB0245462  0.85524923  1.03947573 -1.41584273  1.1077750 -0.8492909
## HMDB0001351 -0.52080656 -0.05545967 -1.00316823 -0.4827946 -1.3680218
## HMDB0062198 -0.28447523  1.37902016  0.52460407  0.6682649 -2.5308987
## HMDB0304087 -0.60726340  0.13238643  0.17994164  0.7755421 -0.4141981
## HMDB0003503 -0.96630600  0.49679919 -0.37345123  0.4675047 -0.2858048
## HMDB0029263 -0.03289151  0.14147323  0.06376681 -0.3596963 -1.8702919
##                     c8         c9
## HMDB0245462  1.4392574 -1.6758193
## HMDB0001351 -0.4619694 -0.3759711
## HMDB0062198  1.3248235  1.8811378
## HMDB0304087 -0.7480831  0.4584730
## HMDB0003503 -1.2779528 -0.8768713
## HMDB0029263  0.4490377 -0.6103103

Now the analysis can be reproduced

limma_HMDB_CB <- data_preprocessed_HMDB %>%
  PomaLimma(contrast = "Cranberry-Baseline", adjust = "fdr")
head(limma_HMDB_CB)

background_HMDB <- limma_HMDB_CB$feature

selected_HMDB <- limma_HMDB_CB %>%
  filter(adj_pvalue < 0.25) %>%
  pull(feature)

The topTables are saved to be used later:

save(limma_resCB, limma_HMDB_CB, file="data/MW_ST000291-limma_res.Rda")

We remove unnecessary packages used onlñy in this section

detach(package:POMA)
detach(package:SummarizedExperiment)

4 `enrichmet` package exmple data

The enrichmetpackage has deposited its example data in Zotero. For reproducibility we download the data directly from there:

To run the enrichmet package, you need to provide at least the following three files:

PathwayVsMetabolites
This file defines the mapping between metabolic pathways and their associated metabolites, serving as the background reference for Fisher’s exact test–based enrichment analysis (see section 2.4).

The mappings are derived from the KEGG database (https://www.kegg.jp/), accessed programmatically via the KEGGREST package. To ensure reproducibility and minimize run-time delays, curated human-specific pathway–metabolite mappings are periodically updated and archived on Zenodo [1]. We provide a human-specific PathwayVsMetabolites file available here:
Human-PathwaysVsMetabolites.csv
However, users can supply a file tailored to their species of interest.

InputMetabolites
A list of metabolites from the user’s experimental dataset for analysis, typically consisting of metabolite identifiers (e.g., significant metabolites from differential analysis (see section 2.4)). This input is not provided by the package and must be prepared by the user. The file serves as the query set for enrichment analysis, and its identifiers should match those used in the background pathway–metabolite mappings (e.g., KEGG compound IDs).

For demonstration purposes, we also include summary statistics derived from an example experimental dataset. These example data are intended solely to showcase the package workflow and will be described in detail in a subsequent publication.

SummaryStatistics
The enrichment analysis requires statistical results associated with the input metabolites, such as fold changes, p-values, or other relevant effect size metrics (see section 2.4). These values guide the enrichment procedure by reflecting both the significance and the magnitude of change for each metabolite.

This information is not generated by the package and must be provided by the user. Summary statistics may be supplied either as a standalone data frame or extracted directly from the rowData() of a SummarizedExperiment object, provided that appropriate columns are available for metabolite identifiers, p-values, and fold changes (see section 2.4 example_data).

For demonstration purposes, the package includes an example summary statistics dataset. These example data are provided solely to illustrate the workflow and will be described in detail in a forthcoming publication.

The enrichmet() function integrates both p-values and fold changes when performing MetSEA. By considering the direction and magnitude of changes, this approach yields richer biological insights than methods based only on binary classifications of “significant” versus “not significant.”

4.1 Optional Input Files

Including these optional files can enhance the analysis and its biological interpretation:

KEGGLookup
A reference file linking KEGG compound IDs (https://www.kegg.jp/) to metabolite names or other annotations, which helps standardize identifiers and improve pathway mapping [1]. We provide a KEGGLookup file here:
kegg_lookup.xlsx
Users may also provide a lookup file specific to their species.
STITCHInteractions
A dataset of known or predicted interactions between metabolites or proteins, obtained from the freely available STITCH database (http://stitch.embl.de/ ). These interactions are useful for constructing networks to support pathway analysis [2]. We supply an example file here:
chemical_chemical.tsv
Users may provide their own interaction data appropriate for their species.
mapping_df
A mapping data frame that links STITCH identifiers with metabolite names, facilitating integration between datasets [2]. The provided example file is available here:
mapping_df.xlsx
Users can substitute this with a mapping file relevant to their species of interest.

4.2 Case study of KrasG12D mutation

We used summary statistics from differential analyses based on metabolomic measurements comparing KrasG12D mutations with and without TAp73 deletion. The top 50 metabolites, ranked by significance, were selected for pathway enrichment analysis using Fisher’s Exact Test. These summary statistics were also utilized for MetSEA analysis.

# InputMetabolites: A vector or list containing the metabolites of interest.
inputMetabolites <- c(
  "C02721", "C03736", "C03139", "C00074", "C02862", "C00092", "C00275",
  "C00386", "C10172", "C00361", "C00242", "C19806", "C16358", "C16357",
  "C16353", "C02140", "C05488", "C01181", "C03017", "C02632", "C00246",
  "C00079", "C08317", "C00588", "C16527", "C00065", "C00249", "C00836",
  "C02990", "C03690", "C04025", "C06424", "C00123", "C00606", "C03067",
  "C00539", "C00633", "C00519", "C00366", "C00148", "C00219", "C08261",
  "C00385", "C06429", "C05637", "C00295", "C06428", "C16513", "C01909",
  "C03299", "C01530", "C01571", "C08278", "C00599", "C15587", "C00144",
  "C03739"
)

# Standard cache location
cache_dir <- tools::R_user_dir("enrichmet", "cache")
bfc <- BiocFileCache(cache_dir, ask = FALSE)

# Helper function to download & cache any file from Zenodo
get_cached_file <- function(url) {
  bfcrpath(bfc, url)
}

# -------------------------
# Load files with caching
# -------------------------

# PathwayVsMetabolites
pathway_url <- "https://zenodo.org/api/records/15498097/files/Human-PathwaysVsMetabolites.csv/content"
pathway_path <- get_cached_file(pathway_url)
PathwayVsMetabolites <- read.csv(pathway_path)
str(PathwayVsMetabolites)

## 'data.frame':    351 obs. of  2 variables:
##  $ Pathway    : chr  "Glycolysis / Gluconeogenesis" "Citrate cycle (TCA cycle)" "Pentose phosphate pathway" "Pentose and glucuronate interconversions" ...
##  $ Metabolites: chr  "C00022,C00024,C00031,C00033,C00036,C00068,C00074,C00084,C00085,C00103,C00111,C00118,C00186,C00197,C00221,C00236"| __truncated__ "C00022,C00024,C00026,C00036,C00042,C00068,C00074,C00091,C00122,C00149,C00158,C00311,C00417,C05125,C05379,C05381"| __truncated__ "C00022,C00031,C00085,C00117,C00118,C00119,C00121,C00197,C00198,C00199,C00204,C00221,C00231,C00257,C00258,C00279"| __truncated__ "C00022,C00026,C00029,C00103,C00111,C00116,C00167,C00181,C00191,C00199,C00204,C00216,C00231,C00259,C00266,C00309"| __truncated__ ...

# SummaryStatistics
example_url <- "https://zenodo.org/api/records/15498097/files/example_data.xlsx/content"
example_path <- get_cached_file(example_url)
example_data <- read_excel(example_path)
show(example_data[1:3,])

## # A tibble: 3 × 4
##   met_id      pval    padj log2fc
##   <chr>      <dbl>   <dbl>  <dbl>
## 1 C02721 0.0000124 0.00315  -2.32
## 2 C03736 0.0000694 0.00739   2.63
## 3 C03139 0.0000869 0.00739   1.75

# KEGGLookup
kegg_url <- "https://zenodo.org/api/records/15498097/files/kegg_lookup.xlsx/content"
kegg_path <- get_cached_file(kegg_url)
kegg_lookup <- read_excel(kegg_path)
head(kegg_lookup)

# Mapping_df
mapping_url <- "https://zenodo.org/api/records/15498097/files/mapping_df.xlsx/content"
mapping_path <- get_cached_file(mapping_url)
mapping_df <- read_excel(mapping_path)
head(mapping_df)

# STITCHInteractions
# stitch_url <- "https://zenodo.org/records/15498097/files/chemical_chemical.tsv"
# stitch_path <- get_cached_file(stitch_url)
# stitch_df <- read_tsv(stitch_path)
# head(stitch_df)

The data from “EnrichMetExample” is not an EnrichmentSet, so a new one is created from the PathwayvsMetabolites data.frame:

First we prepare the long version of PathwayVsMetabolites:

tmp <- PathwayVsMetabolites %>%
  mutate(Metabolites = strsplit(Metabolites, ","))

unnested <- tidyr::unnest(tmp, cols = c(Metabolites))

PathwayVsMetabolites_long <- dplyr::rename(unnested, kegg_id = Metabolites)

head(PathwayVsMetabolites_long)

PathwayVsMetabolites_long <- PathwayVsMetabolites_long %>%
  mutate(PathwayID = paste0("PW_", as.integer(as.factor(Pathway))))
head(PathwayVsMetabolites_long)

PathwayVsMetabolites_long <- PathwayVsMetabolites_long %>%
  left_join(kegg_lookup, by = c("kegg_id" = "kegg_id"))

## Warning in left_join(., kegg_lookup, by = c(kegg_id = "kegg_id")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 132 of `x` matches multiple rows in `y`.
## ℹ Row 159 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

head(PathwayVsMetabolites_long)

With this we can directly build the EnrichmentSet object:

require(localEnrichment)

## Cargando paquete requerido: localEnrichment

KEGGset_KEGG <- buildEnrichmentSet(
  data = PathwayVsMetabolites_long,
  id_col = "kegg_id",
  category_col = "Pathway",
  set_id_col = "PathwayID",
  set_name = "KEGG_hsa_pathways",
  source = "KEGG",
  species = "Homo sapiens",
  version = Sys.Date(),
  description = "KEGG human pathways mapped to KEGG compound IDs",
  sep = ";"
)

summary(KEGGset_KEGG)

## EnrichmentSet summary:
##   Mapping name: KEGG_hsa_pathways 
##   Source: KEGG 
##   Feature IDs: kegg_id 
##   Number of sets: 351 
##   Mean set size: 16.37607 features
##   Median set size: 6 features

head(KEGGset_KEGG@data)

All the data is saved as a binary file to be used later

EM_inputMetabolites <- inputMetabolites
EM_summaryStats <- example_data
EM_PathwayVsMetabolites<- PathwayVsMetabolites
EM_mapping_df <- mapping_df
# EM_stitch_df <- stitch_df
EM_kegg_lookup <- kegg_lookup
save(EM_inputMetabolites, KEGGset_KEGG, EM_PathwayVsMetabolites, EM_summaryStats, EM_kegg_lookup, EM_mapping_df, file="data/enrichmet_example.Rda")

Benchmarking Enrichment Packages

Data preparation

Alex Sanchez

2025-12-01

1 Introduction

2 The cachexia dataset

2.1 Selection of differentially quantified metabolites

2.2 Selected Metabolites and its identifiers

3 The ST000291 dataset

3.1 Data loading and preprocessing

3.2 Selection of differential metabolites

3.3 Restictring the analysis to metabolites with HMDB_ID

4 `enrichmet` package exmple data

4.1 Optional Input Files

4.2 Case study of KrasG12D mutation

Benchmarking Enrichment Packages

Data preparation

Alex Sanchez

2025-12-01

1 Introduction

2 The cachexia dataset

2.1 Selection of differentially quantified metabolites

2.2 Selected Metabolites and its identifiers

3 The ST000291 dataset

3.1 Data loading and preprocessing

3.2 Selection of differential metabolites

3.3 Restictring the analysis to metabolites with HMDB_ID

4 enrichmet package exmple data

4.1 Optional Input Files

4.2 Case study of KrasG12D mutation

4 `enrichmet` package exmple data