diff --git a/R/schedule.R b/R/schedule.R index 13253c8..dea2475 100644 --- a/R/schedule.R +++ b/R/schedule.R @@ -1,52 +1,144 @@ library(dplyr) library(stringr) library(kableExtra) +library(htmltools) -render_program_schedule <- function(program_csv = "../data/program.csv", full_width = TRUE) { +# ========================================================= +# Helpers +# ========================================================= - # ---- Read + clean program ---- - df <- read.csv(program_csv, stringsAsFactors = FALSE, na.strings = c("", "NA")) |> +fill_presenter <- function(df) { + df |> mutate( - date = as.Date(date), - time = str_trim(time), - type = str_trim(type), - author = str_trim(coalesce(author, "")), - title = str_trim(coalesce(title, "")), - info = str_trim(coalesce(info, "")), - color = str_trim(coalesce(color, "")) + presenter = if_else( + str_trim(coalesce(presenter, "")) == "", + str_trim(vapply( + strsplit(coalesce(authors, ""), ","), + function(x) if (length(x) > 0) x[1] else "", + character(1) + )), + str_trim(coalesce(presenter, "")) + ) + ) +} + +normalize_type <- function(x) { + case_when( + tolower(x) %in% c("keynote", "keynotes") ~ "Keynote", + tolower(x) %in% c("short talk", "short talks") ~ "Short talks", + tolower(x) %in% c("flash talk", "flash talks") ~ "Flash talks", + tolower(x) %in% c("workshop", "workshops") ~ "Workshops", + tolower(x) %in% c("bof", "bof session", "bof sessions") ~ "BoF sessions", + tolower(x) %in% c("poster pitch", "poster pitches") ~ "Poster pitches", + tolower(x) %in% c("poster", "posters", "poster session") ~ "Poster session", + TRUE ~ tools::toTitleCase(x) + ) +} + +make_collapsible_title <- function(title, authors = "", abstract = "") { + title_esc <- htmlEscape(coalesce(title, "")) + authors_esc <- htmlEscape(coalesce(authors, "")) + abstract_esc <- htmlEscape(coalesce(abstract, "")) + + if (isTRUE(abstract == "") || is.na(abstract)) { + return(title_esc) + } + + paste0( + "
", + "", + title_esc, + "", + "
", + if (!isTRUE(authors == "") && !is.na(authors)) { + paste0("
Author(s): ", authors_esc, "
") + } else "", + "
", abstract_esc, "
", + "
", + "
" + ) +} + +# Robust time parser: handles 9:40 and 09:40 +parse_hm <- function(x) { + x <- str_trim(coalesce(x, "")) + x[x == ""] <- NA_character_ + + parts <- strsplit(x, ":", fixed = TRUE) + + out <- vapply(parts, function(p) { + if (length(p) < 2) return(NA_real_) + h <- suppressWarnings(as.numeric(p[1])) + m <- suppressWarnings(as.numeric(p[2])) + if (is.na(h) || is.na(m)) return(NA_real_) + h * 60 + m + }, numeric(1)) + + out +} + +# Normalize session day labels like Wed / Wednesday / Wed. +normalize_day_label <- function(x) { + x |> + str_trim() |> + str_to_lower() |> + str_replace_all("\\.", "") |> + substr(1, 3) +} + +# ========================================================= +# 1) Old/simple schedule from program.csv only +# ========================================================= +render_program_schedule <- function( + program_csv = "../data/program.csv", + full_width = TRUE +) { + + df <- read.csv( + program_csv, + stringsAsFactors = FALSE, + na.strings = c("", "NA") + ) |> + mutate( + date = as.Date(date), + time = str_trim(coalesce(time, "")), + type = str_trim(coalesce(type, "")), + author = str_trim(coalesce(author, "")), + title = str_trim(coalesce(title, "")), + info = str_trim(coalesce(info, "")), + color = str_trim(coalesce(color, "")) ) |> - arrange(date, time) + arrange(date, parse_hm(time)) + + conference_start <- min(df$date, na.rm = TRUE) - # Add day info - conference_start <- df$date |> as.Date() |> min() if (is.na(conference_start)) { stop("date must be in ISO format YYYY-MM-DD, e.g. 2026-06-03.") } - df$day <- (df$date - conference_start)+1 + + df <- df |> + mutate(day = as.integer(date - conference_start) + 1) day_headers_df <- df |> - distinct(day) |> - arrange(day) |> + distinct(day, date) |> + arrange(day, date) |> mutate( - date = conference_start + (day - 1), - header = paste0("Day ", day, " — ", format(date, "%a. %b %d, %Y")) + header = format(date, "%a. - %b. %d, '%y") ) day_headers <- day_headers_df$header names(day_headers) <- as.character(day_headers_df$day) - # Output table data - df_out <- df |> select(time, type, author, title) + df_out <- df |> + select(time, type, author, title) - # Row indices by day idx_by_day <- split(seq_len(nrow(df_out)), df$day) - # ---- Build table ---- tbl <- kbl( df_out, escape = TRUE, row.names = FALSE, - col.names = c("Time", "Type", "Author", "Title") + col.names = c("TIME", "TYPE", "AUTHOR", "TITLE") ) |> kable_material(full_width = full_width) |> column_spec(1, width = "12%") |> @@ -54,7 +146,6 @@ render_program_schedule <- function(program_csv = "../data/program.csv", full_wi column_spec(3, width = "28%") |> column_spec(4, width = "42%") - # ---- Apply row colors from program.csv ---- for (i in seq_len(nrow(df))) { bg <- df$color[i] if (!is.na(bg) && nzchar(bg)) { @@ -62,17 +153,372 @@ render_program_schedule <- function(program_csv = "../data/program.csv", full_wi } } - # ---- Group by day (only if that day exists) ---- for (key in names(day_headers)) { - if (!is.null(idx_by_day[[key]])) { + rows_this_day <- idx_by_day[[key]] + if (!is.null(rows_this_day) && length(rows_this_day) > 0) { tbl <- tbl |> - pack_rows( - day_headers[[key]], - min(idx_by_day[[key]]), - max(idx_by_day[[key]]) + pack_rows(day_headers[[key]], min(rows_this_day), max(rows_this_day)) + } + } + + tbl |> cat() +} + +# ========================================================= +# Posters section appended after the combined schedule +# If poster day is missing, render one single table, no day title +# If poster day exists, use program day order +# Author names alphabetical within each day +# ========================================================= +render_posters_section <- function( + sessions, + day_header_map, + full_width = TRUE +) { + posters <- sessions |> + filter(type_norm %in% c("Poster", "Poster session")) + + if (nrow(posters) == 0) { + return(invisible(NULL)) + } + + cat("

POSTERS

\n") + cat("
\n") + cat("

(In alphabetical order.)

\n") + + posters_with_day <- posters |> + filter(!is.na(day) & str_trim(day) != "") + + posters_without_day <- posters |> + filter(is.na(day) | str_trim(day) == "") + + # Undated posters: one single table + if (nrow(posters_without_day) > 0) { + undated_df <- posters_without_day |> + arrange(presenter, title) |> + mutate(idx = seq_len(n())) + + out <- undated_df |> + mutate( + Author = paste0(idx, " ", htmlEscape(presenter)), + Title = mapply( + make_collapsible_title, + title, + authors, + abstract, + USE.NAMES = FALSE ) + ) |> + select(Author, Title) + + tbl_posters <- kbl( + out, + escape = FALSE, + row.names = FALSE, + col.names = c("# AUTHOR", "TITLE") + ) |> + kable_material(full_width = full_width) |> + column_spec(1, width = "28%") |> + column_spec(2, width = "72%") + + tbl_posters |> cat() + } + + # Dated posters: grouped by day in program order + if (nrow(posters_with_day) > 0) { + day_levels <- names(day_header_map) + + posters_with_day <- posters_with_day |> + mutate(day = factor(day, levels = day_levels)) |> + arrange(day, presenter, title) + + for (d in day_levels) { + day_df <- posters_with_day |> + filter(as.character(day) == d) |> + arrange(presenter, title) + + if (nrow(day_df) == 0) next + + day_df <- day_df |> + mutate(idx = seq_len(n())) + + heading <- day_header_map[[d]] + if (is.null(heading) || is.na(heading) || heading == "") { + heading <- d + } + + cat( + paste0( + "

", + htmlEscape(heading), + "

\n" + ) + ) + + out <- day_df |> + mutate( + Author = paste0(idx, " ", htmlEscape(presenter)), + Title = mapply( + make_collapsible_title, + title, + authors, + abstract, + USE.NAMES = FALSE + ) + ) |> + select(Author, Title) + + tbl_posters <- kbl( + out, + escape = FALSE, + row.names = FALSE, + col.names = c("# AUTHOR", "TITLE") + ) |> + kable_material(full_width = full_width) |> + column_spec(1, width = "28%") |> + column_spec(2, width = "72%") + + tbl_posters |> cat() + } + } +} + +# ========================================================= +# 2) Detailed/combined schedule from program.csv + sessions.csv +# ========================================================= +render_detailed_program <- function( + program_csv = "../data/program.csv", + sessions_csv = "../data/sessions.csv", + full_width = TRUE +) { + + # ----------------------------- + # Read overview program + # ----------------------------- + program <- read.csv( + program_csv, + stringsAsFactors = FALSE, + na.strings = c("", "NA") + ) |> + mutate( + date = as.Date(date), + time = str_trim(coalesce(time, "")), + type = str_trim(coalesce(type, "")), + author = str_trim(coalesce(author, "")), + title = str_trim(coalesce(title, "")), + info = str_trim(coalesce(info, "")), + color = str_trim(coalesce(color, "")), + time_min = parse_hm(time), + type_norm = normalize_type(type) + ) |> + arrange(date, time_min) + + conference_start <- min(program$date, na.rm = TRUE) + if (is.na(conference_start)) { + stop("program.csv needs valid ISO dates.") + } + + # ----------------------------- + # Read sessions + # ----------------------------- + sessions <- read.csv( + sessions_csv, + stringsAsFactors = FALSE, + na.strings = c("", "NA") + ) |> + mutate( + day = str_trim(coalesce(day, "")), + time = str_trim(coalesce(time, "")), + type = str_trim(coalesce(type, "")), + title = str_trim(coalesce(title, "")), + authors = str_trim(coalesce(authors, "")), + presenter = str_trim(coalesce(presenter, "")), + abstract = str_trim(coalesce(abstract, "")), + time_min = parse_hm(time), + type_norm = normalize_type(type) + ) |> + fill_presenter() + + # ----------------------------- + # Map sessions day names to actual program dates by weekday + # This avoids wrong placement when sessions.csv is mixed + # ----------------------------- + program_day_lookup <- program |> + distinct(date) |> + arrange(date) |> + mutate( + day_key = normalize_day_label(format(date, "%a")) + ) + + sessions <- sessions |> + mutate(day_key = if_else(day == "", NA_character_, normalize_day_label(day))) |> + left_join( + program_day_lookup |> + select(day_key, date), + by = "day_key" + ) + + # ----------------------------- + # Build formatted day headers from program.csv + # ----------------------------- + program <- program |> + mutate(day_num = as.integer(date - conference_start) + 1) + + day_headers_df <- program |> + distinct(day_num, date) |> + arrange(day_num, date) |> + mutate( + header = format(date, "%a. - %b. %d, '%y"), + day_key = normalize_day_label(format(date, "%a")) + ) + + day_headers <- day_headers_df$header + names(day_headers) <- as.character(day_headers_df$day_num) + + # Map raw session day labels to formatted day headers from program.csv + sessions_day_header_map <- sessions |> + filter(!is.na(day), str_trim(day) != "", !is.na(date)) |> + distinct(day, day_key) |> + left_join( + day_headers_df |> + select(day_key, header), + by = "day_key" + ) |> + filter(!is.na(header)) + + day_header_map <- setNames(sessions_day_header_map$header, sessions_day_header_map$day) + + # ----------------------------- + # Compute the end time of each program block + # ----------------------------- + program <- program |> + group_by(date) |> + arrange(time_min, .by_group = TRUE) |> + mutate( + next_time_min = lead(time_min), + block_end_min = if_else(is.na(next_time_min), 24 * 60, next_time_min) + ) |> + ungroup() + + # ----------------------------- + # Build combined schedule rows + # ----------------------------- + out_rows <- list() + out_bg <- character() + + add_row <- function(time = "", type = "", author = "", title = "", bg = "") { + out_rows[[length(out_rows) + 1]] <<- data.frame( + Time = time, + Type = type, + Author = author, + Title = title, + stringsAsFactors = FALSE + ) + out_bg[length(out_bg) + 1] <<- bg + } + + row_counts_per_program <- integer(nrow(program)) + + for (i in seq_len(nrow(program))) { + pr <- program[i, ] + + add_row( + time = pr$time, + type = pr$type, + author = pr$author, + title = pr$title, + bg = pr$color + ) + + expandable_types <- c( + "Short talks", + "Flash talks", + "Workshops", + "BoF sessions", + "Poster pitches", + "Poster session" + ) + + attached_n <- 0L + + if (pr$type_norm %in% expandable_types) { + ss <- sessions |> + filter( + !is.na(date), + date == pr$date, + type_norm == pr$type_norm, + time_min >= pr$time_min, + time_min < pr$block_end_min + ) |> + arrange(time_min, presenter, title) + + if (nrow(ss) > 0) { + for (j in seq_len(nrow(ss))) { + s <- ss[j, ] + + add_row( + time = "", + type = "", + author = htmlEscape(s$presenter), + title = make_collapsible_title(s$title, s$authors, s$abstract), + bg = pr$color + ) + } + attached_n <- nrow(ss) + } + } + + row_counts_per_program[i] <- 1 + attached_n + } + + df_out <- bind_rows(out_rows) + + # ----------------------------- + # Group rows by day + # ----------------------------- + idx_by_day <- split( + seq_len(nrow(df_out)), + rep(program$day_num, times = row_counts_per_program) + ) + + # ----------------------------- + # Render the combined table + # ----------------------------- + tbl <- kbl( + df_out, + escape = FALSE, + row.names = FALSE, + col.names = c("TIME", "TYPE", "AUTHOR", "TITLE") + ) |> + kable_material(full_width = full_width) |> + column_spec(1, width = "12%") |> + column_spec(2, width = "18%") |> + column_spec(3, width = "26%") |> + column_spec(4, width = "44%") + + for (i in seq_len(nrow(df_out))) { + bg <- out_bg[i] + if (!is.na(bg) && nzchar(bg)) { + tbl <- tbl |> row_spec(i, background = bg) + } + } + + for (key in names(day_headers)) { + rows_this_day <- idx_by_day[[key]] + if (!is.null(rows_this_day) && length(rows_this_day) > 0) { + tbl <- tbl |> + pack_rows(day_headers[[key]], min(rows_this_day), max(rows_this_day)) } } tbl |> cat() + + # ----------------------------- + # Render posters after the main schedule + # ----------------------------- + render_posters_section( + sessions = sessions, + day_header_map = day_header_map, + full_width = full_width + ) } diff --git a/data/program.csv b/data/program.csv index b9d258e..8012156 100644 --- a/data/program.csv +++ b/data/program.csv @@ -1,32 +1,32 @@ date,time,type,author,title,info,color -2026-06-03,09:00,Opening,Organizing committee,Welcome to EuroBioC,,#ECEFF1 -2026-06-03,09:30,Keynote,,,,#D1C4E9 -2026-06-03,10:00,Short talks,From submitted abstracts,,,#FFF9C4 -2026-06-03,10:35,Break,,,, -2026-06-03,11:00,Short talks,From submitted abstracts,,,#FFF9C4 -2026-06-03,12:15,Lunch,,,, -2026-06-03,13:30,Workshops,From submitted abstracts,,,#F3E5F5 -2026-06-03,15:00,Break,,,, -2026-06-03,15:15,Poster pitches,From submitted abstracts,,,#E8F5E9 -2026-06-03,16:00,Poster session,From submitted abstracts,,,#E8F5E9 -2026-06-03,18:00,Conference dinner,,,,#E3F2FD -2026-06-04,09:00,Keynote,,,,#D1C4E9 -2026-06-04,09:30,Short talks,From submitted abstracts,,,#FFF9C4 -2026-06-04,10:35,Break,,,, -2026-06-04,11:00,Keynote,,,,#D1C4E9 -2026-06-03,11:30,Short talks,From submitted abstracts,,,#FFF9C4 -2026-06-04,12:15,Lunch,,,, -2026-06-04,13:30,Workshops,From submitted abstracts,,,#F3E5F5 -2026-06-04,15:00,Break,,,, -2026-06-04,15:15,Short talks,From submitted abstracts,,,#FFF9C4 -2026-06-04,15:45,Poster pitches,From submitted abstracts,,,#E8F5E9 -2026-06-04,16:30,Poster session,From submitted abstracts,,,#E8F5E9 -2026-06-04,19:00,Walking tour,,,,#E3F2FD -2026-06-05,09:00,Keynote,,,,#D1C4E9 -2026-06-05,09:30,Short talks,From submitted abstracts,,,#FFF9C4 -2026-06-05,09:55,Flash talks,,,,#FFE082 -2026-06-05,10:35,Break,,,, -2026-06-05,11:00,BoF sessions,,,,#E0F7FA -2026-06-05,12:30,Break,,,, -2026-06-05,12:45,Closing,Organizing committee,,,#ECEFF1 -2026-06-05,13:00,Lunch,,,, +2026-06-03,09:00,Opening,Organizing committee,Welcome to EuroBioC,, +2026-06-03,09:30,Keynote,Helena Kilpinen,,,#E3F2FD +2026-06-03,10:00,Short talks,,,, +2026-06-03,10:35,Break,,,,#FFF9C4 +2026-06-03,11:00,Short talks,,,, +2026-06-03,12:15,Lunch,,,,#FFF9C4 +2026-06-03,13:30,Workshops,,,, +2026-06-03,15:00,Break,,,,#FFF9C4 +2026-06-03,15:15,Poster pitches,,,, +2026-06-03,16:00,Poster session,,,, +2026-06-03,18:00,Conference dinner,,,, +2026-06-04,09:00,Keynote,Anders Krogh,,,#E3F2FD +2026-06-04,09:30,Short talks,,,, +2026-06-04,10:35,Break,,,,#FFF9C4 +2026-06-04,11:00,Keynote,Aura Raulo,,,#E3F2FD +2026-06-04,11:30,Short talks,,,, +2026-06-04,12:15,Lunch,,,,#FFF9C4 +2026-06-04,13:30,Workshops,,,, +2026-06-04,15:00,Break,,,,#FFF9C4 +2026-06-04,15:15,Short talks,,,, +2026-06-04,15:45,Poster pitches,,,, +2026-06-04,16:30,Poster session,,,, +2026-06-04,19:00,Walking tour,,,, +2026-06-05,09:00,Keynote,Levi Waldron,,,#E3F2FD +2026-06-05,09:30,Short talks,,,, +2026-06-05,09:55,Flash talks,,,, +2026-06-05,10:35,Break,,,,#FFF9C4 +2026-06-05,11:00,BoF sessions,,,, +2026-06-05,12:30,Break,,,,#FFF9C4 +2026-06-05,12:45,Closing,Organizing committee,,, +2026-06-05,13:00,Lunch,,,,#FFF9C4 diff --git a/data/sessions.csv b/data/sessions.csv new file mode 100644 index 0000000..b922beb --- /dev/null +++ b/data/sessions.csv @@ -0,0 +1,103 @@ +day,time,type,title,authors,presenter,abstract +Wed,13:30,workshop,Analysing ChIP-Seq Data using extraChIPs,Stevie M Pederson,,"The extraChIPs package was developed to enable Differential Signal Analysis for one or more ChIP targets, along with providing multiple utility functions associated with this type of analysis. Differential Signal Analysis can be performed using fixed-width regions, in a manner similar to DiffBind1, or using sliding windows as popularised by csaw2, with these methods enhanced and extended by extraChIPs. All approaches utilise existing Bioconductor classes and rely on existing packages, allowing efficient use of computational resources and strong integration with the Bioconductor ecosystem. This workshop will cover both approaches to analysis, highlighting the advantages and disadvantages of each method. A particular focus will be on the decision making process for normalisation and the selection of the window-size most appropriate for your data-type. The integration of two ChIP targets will also be discussed and demonstrated, taking the Estrogen Receptor (ER𝛼) and H3K27ac marks as examples, extending analysis into the incorporation of HiC and enhancer annotations. The workshop will also be suitable for those working with ATAC-Seq, however read counting will only be demonstrated in the context of ChIP-Seq. This session is also a Bioconductor workflow which is currently in preparation. 1. Ross-Innes, C. S., Stark, R., Teschendorff, A. E., Holmes, K. A., Ali, H. R., Dunning, M. J., Brown, G. D., Gojis, O., Ellis, I. O., Green, A. R., Ali, S., Chin, S.-F., Palmieri, C., Caldas, C., and Carroll, J. S. (2012). Differential oestrogen receptor binding is associated with clinical outcome in breast cancer. Nature 481, 389-393 2. Lun, A. T. L., & Smyth, G. K. (2016). csaw: a Bioconductor package for differential binding analysis of ChIP-seq data using sliding windows. Nucleic Acids Research, 44(5), e45. https://doi.org/10.1093/nar/gkv1191" +Wed,13:30,workshop,PSMatch: an R/Bioconductor package to explore proteomics identification data.,"Guillaume Deflandre,Sebastian Gibb,Laurent Gatto",,"Mass spectrometry (MS) is the current state-of-the-art technology to comprehensively study complex proteomes. The processing of MS-based proteomics data entails two distinct, albeit related steps, namely the identification of peptides and their quantitation. As the proportion of unidentified spectra in MS-based experiments typically range from 50% to 90% (for single-cell proteomics experiments), dedicated and flexible tools built for handling the actual identified spectra are essential. In this workshop, we will present PSMatch, an R/Bioconductor package designed to handle identification data in the form of peptide-spectrum matches (PSMs) and offering functionalities to streamline exploration and visualisation of PSM data. We will begin by performing a real-time database search using the search engine Sage, after which you will familiarise yourself with PSM data. We will demonstrate how to load PSM data from mzIdentML (mzID) or tabular files, generate theoretical fragment ions, model peptide-protein relations and facilitate various visualisations. We will also present the recent advancements in the package that provide functionality to visualise and validate post-translational modifications (PTMs)." +Wed,13:30,workshop,A standardized R/Bioconductor framework for integrative analysis of histopathological images with multi-omics data,"Ilaria Billato,Marcel Ramos,Eslam Abousamra,Mohamed Omar,Sean Davis,Hervé Pagès,Vincent James Carey,Levi Waldron,Chiara Romualdi,Davide Risso,Sehyun Oh",,"Histopathological images provide unparalleled insights into tissue architecture, cellular morphology, and tumor spatial organization. While these images are routinely used in cancer research and clinical practice, their computational analysis typically relies on specialized software outside the R/Bioconductor ecosystem, which remains the primary environment for high-throughput omics data analysis. This separation represents a major barrier to truly integrative multi-modal studies. To address this challenge, we applied a standardized and large-scale image analysis workflow to raw histopathological images and release the resulting pre-computed features as a comprehensive TCGA-derived resource directly usable within R/Bioconductor, in compatible data structures: SpatialExperiment, SpatialFeatureExperiment, and MultiAssayExperiment. Through the imageTCGA package, we provide nuclear segmentations and cell-level annotations generated using HoVer-Net, a state-of-the-art deep learning model for nuclear segmentation and classification. All outputs are organized in standardized data structures fully compatible with existing Bioconductor analytical frameworks. We further extracted image embeddings using the foundation model ProvGigaPath. Embeddings were computed at both the tile level, where whole slide images are divided into smaller regions capturing fine-grained local morphological patterns, and the slide level, providing a global representation of the entire histopathological specimen. We processed the complete collection of diagnostic H&E-stained whole slide images (WSIs) from TCGA, comprising 11,765 images from 9,640 cases across 32 cancer types. This effort required substantial computational resources and extensive data download, preprocessing, and harmonization steps, making the public release of these pre-computed features particularly valuable for the community. Extracted features include nuclei-level information such as spatial coordinates, shape, texture, and cellular classification (e.g., benign, neoplastic, stromal, and necrotic), as well as higher-level features describing tissue organization, cellular composition, and spatial relationships. Moreover, these image-derived features are directly integrable with the extensive collection of publicly available TCGA molecular data, including bulk RNA-seq expression profiles, DNA methylation, somatic mutations, copy number alterations, and clinical annotations, enabling comprehensive multi-modal analyses within a unified framework. Package vignettes demonstrate how to apply Principal Pattern Analysis (PPA) for exploratory analysis of image-derived features and how to use MOFA for multi-modal integration of histopathology, genomics, and transcriptomics data. In addition, all data are accessible, downloadable, and interactively explorable through a dedicated Shiny application, enabling visualization of extracted features alongside the original images. Overall, this work bridges a critical gap between histopathological image analysis and multi-omics integration by providing robust tools and a large-scale, ready-to-use feature repository within the R/Bioconductor ecosystem. imageTCGA facilitates the systematic incorporation of spatial and morphological information into cancer studies, supporting hypothesis generation and validation across diverse tumor types. Future work will focus on expanding the repository to additional image collections and developing new methods for integrated multi-modal data analysis." +Thu,13:30,workshop,SpectriPy: Criss-Crossing R and Python for Powerful Mass Spectrometry Data Analysis Workflows,Johannes Rainer,,"Mass spectrometry (MS) is a key technology used across multiple fields, including biomedical research and life sciences. Technological advancements result in increasingly large and complex data sets and analyses must be tailored to the experimental and instrumental setups. Excellent software libraries for such data analysis are available in both R and Python, including R packages from the RforMassSpectrometry initiative and Python libraries like *matchms*, *spectrum_utils*, *Pyteomics* and *pyOpenMS*. Having partially complimentary functionality, these software cover different aspects of MS-based proteomics or metabolomics data analysis. Integration of basic data types and functionality of the two programming languages are provided by the *reticulate* R package. The *SpectriPy* Bioconductor package builds upon *reticulate* and provides additional functionality to efficiently translate between R and Python MS data structures. It can convert between R’s `Spectra::Spectra` and Python’s `matchms.Spectrum` and `spectrum_utils.spectrum.MsmsSpectrum` objects hence enabling and simplifying the integration of R and Python for MS data analysis. In this workshop we present how reproducible cross-language MS data analyses can be conducted with the *SpectriPy* package. Such analyses will empower data analysts who can thus benefit from the full power of algorithms in both programming languages." +Thu,13:30,workshop,A reproducibility-driven framework for differential expression analysis of high-throughput omics data,"Tomi Suomi,Laura Elo",,"Reproducibility is a fundamental requirement for generating reliable and impactful scientific findings, particularly in the context of high-dimensional omics data such as transcriptomics and proteomics. The datasets are often complex, characterized by a large number of features measured in relatively small sample sizes, technical noise, and sampling variability. Ensuring that molecular signatures are reproducible across independent studies and experimental conditions is therefore critical. The reproducibility-optimized test statistic (ROTS) framework was developed to address these challenges by prioritizing features that demonstrate high reproducibility in differential expression analysis. The ROTS framework supports, but is not limited to, two-group differential expression analysis, multi-group analysis, linear models, mixed-effects models, and survival analysis. These allow the application of ROTS in a variety of settings, including longitudinal studies, time-to-event analyses, and many complex experimental designs that are common in clinical and systems biology research. The methodology has been implemented in an R package available through the Bioconductor project, attracting many users over the years, and resulting in multiple independent banchmark assessments. Here, we showcase the framework using both simulated datasets and real-world omics studies and include step-by-step examples that guide participants through the practical use of the framework in Bioconductor. We go from data input and experimental design to result visualization and interpretation. Using concrete real-world examples, attendees will learn how to apply ROTS to different types of omics data, tune key parameters, and assess reproducibility alongside conventional metrics." +Thu,13:30,workshop,msqrob2PTM: prioritizing differentially abundant and differentially used post translational modifications in mass spectrometry-based proteomics,"Lieven Clement,Nina Demeulemeester,Luca De Corso,Lucas Beerland",,"Mass spectrometry–based proteomics allows the identification and quantification of a myriad of posttranslational modifications (PTMs), which reveal additional complexity and diversity of the proteome. Indeed, PTMs greatly extend the number of different forms of a protein, i.e. proteoforms, that can be found. More importantly, these PTMs can impact protein functions as they often act as key switches in many cellular pathways that play vital roles in e.g. cell proliferation, metastasis and ageing. Novel multiple open-modification search engines that were developed in the proteomics community boost the identification of post-translational modifications (PTMs) with mass spectrometry (MS) based technologies. However, despite the advances in PTM identification, statistical methods for sensitive PTM-level quantification and differential analysis are lagging behind, and many researchers continue to rely on ad-hoc analysis workflows due to a lack of clear guidelines, which can lead to violations of key statistical assumptions. In this workshop, we will offer a hands-on introduction to PTM analysis in the msqrob2 package, which provides a set of rigorously validated and benchmarked statistical workflows for MS-based proteomics. We will begin by a data exploration to highlight that (1) peptides can contain multiple PTMs so we refer to a peptide with all its modifications as peptidoforms, (2) inferring on individual PTMs requires one to summarise all peptidoforms that carry a particular PTM into one PTM expression value, and show that it remains important to evaluate and visualize all peptidoforms that contribute to a PTM to rule out reporting differential PTMs that only stem from a subset of differential peptidoforms carrying another PTM; and that (3) parent proteins on which PTMs occur can also change in abundance regardless of their PTMs. Hence, any changes in the abundance of a PTM are then confounded with changes in protein abundance. We therefore introduce the concepts Differential Peptidoform/PTM Abundance (DPA) as well as Differential Peptidoform/PTM Usage (DPU) to enable a clear distinction between directly assessing DA of peptidoforms/PTMs (DPA), and differences in the relative usage of peptidoforms/PTMs corrected for changes in overall abundance of the corresponding protein (DPU). Next, we walk the participants through the minimal data processing steps required prior to statistical modelling, explaining when and why each step is necessary. Next, we introduce all steps required to perform DPA and DPU. For the DPU analyses, we will consider two different experimental designs. We illustrate the entire workflow when the peptidoform and protein-level abundance are acquired on the same sample (paired design). Thereafter we show how the analysis differs when peptidoform and protein-level abundances are quantified in independent samples (unpaired design). Finally, we will demonstrate how to translate biological questions into hypothesis tests and how to prioritise, report and visualise peptidoforms as well as PTMs that are differentially abundant / differentially used across treatment groups or upon stimulation. This workshop is designed for proteomics researchers who want to learn how to conduct differential PTM analyses using reproducible and statistically sound workflows, as well as for omics data analysts interested in expanding their skill set to include proteomics." +Wed,10:00,short talk,The tragic death of open source research software,Laurent Gatto,,"The Bioconductor project promotes high-quality, well documented, and interoperable software. The project offers detailed guidelines [1] to help achieve these objectives. These guidelines focus on important aspects of software development, documentation, testing, maintenance... as well as an often overlooked phase of the software life cycle, namely deprecating functionality [2] and sun-setting the software [3]. In this talk, I will reflect on the central role of software in research, and the personal, professional and financial constrains in which they are developed... and hence the importance of software maintenance, survival and, for most of them, death. References: [1] [Bioconductor Packages: Development, Maintenance, and Peer Review](https://contributions.bioconductor.org/index.html). [2] Chapter 25: [Deprecation Guidelines](https://contributions.bioconductor.org/deprecation.html) [3]. Chapter 26: [Package End of Life Policy](https://contributions.bioconductor.org/package-end-of-life-policy.html)." +Wed,10:10,short talk,Environmental sustainability for computational research: what can Bioconductor developers do?,Elizabeth Ing-Simmons,,"Environmental sustainability is becoming an increasing concern to research funders and institutions. The focus of research sustainability initiatives in the biosciences has largely been on the impact of wet lab research, e.g. through single-use plastic waste and energy-hungry ultra-low temperature freezers. However, computational research, particularly big data and AI/ML methods, is an increasing contributor to the environmental impact of research. Organisations like NetDRIVE and Green DiSC have been created to provide frameworks and resources to help computational researchers tackle these environmental impacts. The Green Software Foundation defines three principles for reducing the carbon emissions of software: energy efficiency, hardware efficiency, and choosing electricity sources that are less carbon-intensive. Of these, developers can typically address energy and hardware efficiency by optimising the CPU and memory usage of their software. However, efficiency considerations must be balanced with other aspects of software development, including code readability, maintainability, and limited development time. Computational biology software is often used by novices with limited training, putting additional pressure on the software developers to ensure their code is efficient. However, the developers themselves may also lack formal training and experience in identifying and tackling efficiency bottlenecks. In this presentation I will describe potential code efficiency issues that are relevant for R/Bioconductor packages, and why developers should care about them. I will present tools to profile R code, and some strategies that can be used to improve efficiency. The aim of this presentation is to start a discussion amongst Bioconductor developers about how to improve the efficiency of Bioconductor packages, and to identify priority areas for optimisation as part of my NetDRIVE fellowship." +Wed,10:20,short talk,Evolving Bioconductor training in Africa drawing on insights from 2025 and plans for 2026–2027,"Laurah Nyasita Ondari,Trushar Shah,Maria Doyle",,"High-throughput sequencing technologies are increasingly used across Africa in public health, agriculture, and biomedical research; however, access to structured, advanced bioinformatics training remains limited. Despite the availability of powerful open-source infrastructures such as Bioconductor and Galaxy, disparities in training opportunities, computational foundations, and locally available instructors continue to restrict the adoption of reproducible genomic data analysis workflows. This skills gap constrains research output, slows innovation, and limits the ability of African institutions to fully leverage rapidly growing genomic datasets. To address this need, Bioconductor applied for and was awarded a Chan Zuckerberg Initiative grant to develop and deliver coordinated in-person training, instructor development, and institutional partnerships across East and West Africa in 2025. Three intensive, hands-on workshops were delivered in Nairobi (Kenya), Addis Ababa (Ethiopia), and Abomey-Calavi (Benin), training a total of 83 participants (N = 83) selected from highly competitive applicant pools. Participants were drawn from universities, national research institutes, and biotechnology organisations working in genomics, public health, agriculture, and biomedical research. The curriculum covered R for data manipulation and visualisation, reproducible research practices, Bioconductor core data structures including SummarizedExperiment, and bulk RNA-seq workflows using DESeq2. Training combined structured instruction over four days with applied exercises using real datasets on the final day, dubbed “Bring-Your-Own-Data” (BYOD) day. The BYOD session was designed to strengthen analytical competence and workflow reproducibility by enabling participants to apply newly acquired skills directly to datasets generated through their own research or academic work. Post-course evaluations demonstrated substantial self-reported improvements in R proficiency (mean improvement score >4 on a 5-point Likert scale), high satisfaction ratings (>90% rating the course as “Very Good” or “Excellent”), and universal willingness to recommend the training to colleagues (100% of respondents). In Benin, French-language instructional support and translated materials were introduced, significantly improving accessibility in a multilingual research environment. Furthermore, six African researchers (n = 6) completed Carpentries instructor certification through the Carpentries Silver Membership, strengthening institutional capacity to sustain and deliver future training locally. Across workshops, participants expressed strong demand for advanced training in single-cell transcriptomics, population genomics and variant analysis, metagenomics, multi-omics integration, and computational workflow management. These findings indicate a clear transition from introductory capacity building toward advanced, specialised bioinformatics needs. Responding to this demand, future efforts will focus on delivering advanced Bioconductor workshops in Africa, supported by webinars, seminar series, and hybrid delivery models to expand participation and ensure sustained capacity building. We welcome collaboration as we expand this next phase of Bioconductor training across Africa." +Wed,11:00,short talk,Benchmark of single-cell batch correction methods available in the R and Python languages.,"Elena Zuin,Chiara Romualdi,Davide Risso,Gabriele Sales",,"Multi-sample studies in single-cell RNA-sequencing introduce technical batch effects from operators, laboratories, reagents, platforms, and timing, which confound downstream analyses. Current correction methods, based on different mathematical approaches, frequently yield inconsistent results. Existing benchmarks are limited by incomplete scenario coverage and reliance on disparate datasets, hindering reliable comparison of these methods. To overcome these limitations, we conducted a benchmark study using 130 real datasets. We projected these datasets into a shared two-dimensional space and evaluated the effect of batch correction with multiple quantitative metrics. We found that no single metric adequately captures both effective batch removal and preservation of biological signals. Therefore, we propose a more informative combination: the Wasserstein distance for assessing batch removal and Adjusted Rand Index (ARI) for evaluating biological signal preservation, combined via their geometric mean. We identified regions of relative strength and weakness of different batch correction algorithms by means of a Support Vector Machine (SVM) classifier, enabling prediction of the optimal method for a given dataset based on its structural characteristics. Finally, we developed BatChef, an R package that streamlines batch effects correction workflows and assists users in identifying and applying the optimal method for their data." +Wed,11:10,short talk,Coralysis: sensitive integration of single-cell data,"António G.G. Sousa,Johannes Smolander,Sini Junttila,Laura Elo",,"State-of-the-art integration methods for single-cell data often struggle with imbalanced cell types across heterogeneous datasets, especially when similar but unshared cell types are present. Here, we introduce Coralysis, an R/Bioconductor package with a multi-level integration algorithm designed to overcome these challenges. Coralysis enables sensitive integration, reference mapping, and cell state identification across single-cell datasets, demonstrating robust performance in diverse single-cell RNA-seq integration tasks. It outperforms existing methods when similar cell types are unevenly distributed across batches or entirely absent from some datasets. Beyond single-cell transcriptomics, Coralysis integrates rare cell populations from single-cell proteomic assays, such as basophils (0.5%) from whole blood. Additionally, it improves cell type classification across various query-reference scenarios. For example, it correctly reassigns CD16+ monocytes and natural killer cells that were previously misclassified as CD14+ monocytes and cytotoxic T cells in peripheral blood mononuclear cells. Notably, Coralysis provides probability scores that help identify both transient and stable cell states, along with their associated differential expression programs. Overall, Coralysis enhances the study of subtle biological variations and their dynamics by improving the integration of imbalanced cell types and states, offering a more faithful representation of the cellular landscape in complex single-cell experiments. Coralysis is available as an R/Bioconductor package at: https://bioconductor.org/packages/release/bioc/html/Coralysis.html." +Wed,11:20,short talk,Annotating splicing events with splicelogic,"Beatriz Campillo,Michael I Love",,"Cells alter the RNA isoforms they produce through changes in transcription initiation, elongation, and termination and RNA processing by splicing factors in response to different conditions or over time. Differential transcript usage (DTU) can be detected from RNA sequencing, and new long read technologies capable of sequencing full transcripts has greatly improved the precision of DTU analyses. However, the diversity of DTU underlying events makes it challenging to summarize in terms of functional consequences on RNA regulation and translation. `splicelogic` is an R package that adds biological interpretability to DTU analyses by transforming transcript-level summary statistics (from any DTU method) and annotated exon structures (from GTF or TxDb), into annotated splice events such as exon skipping, alternative splice sites, intron retention, and alternative UTRs. Splice events and their consequences are represented as standard `Bioconductor` objects, enabling seamless integration with existing genomic workflows. Annotations are provided as metadata columns on the transcripts detected as participating in DTU. Event detection and classification are implemented using efficient, fully vectorized `GRanges` / `plyranges` operations, allowing scalable and reproducible analyses without per-event looping. The `splicelogic` functions are **modular, readable, and extensible**, making it easy for users to adapt core logic to custom scientific questions or novel splicing definitions. By translating DTU changes into interpretable splice events, `splicelogic` enables downstream analyses of **functional consequences**, including protein sequence changes and integration with regulation, structure, or function-based tools. We actively seek feedback and collaboration from the `Bioconductor` community, particularly from users to test `splicelogic` on their own datasets and help shape its usability and downstream integration into long-read and short-read RNA-seq workflows. `splicelogic` will be submitted for the `Bioconductor` April 2026 release." +Wed,11:30,short talk,Transcript Grouping: Utilizing tidyomics pipeline with plyxp to summarize expression informed by transcript similarity,"Justin T. Landis,Michael I Love",,"Traditional transcript analysis, such as differential expression or differential splicing, is typically resolved at either the gene or transcript level. Using the `plyxp` package, we provide a framework to summarize isoforms within genes, enabling an alternative analysis that falls between these levels. The `plyxp` package, part of the tidyomics project, enables common `dplyr` operations on SummarizedExperiment objects, including mutation, filtering, grouping, and summarization. `plyxp`'s flexible framework provides the backend for numerous tidySummarizedExperiment operations and has potential for expansion into the treeSummarizedExperiment class. We demonstrate an RNA-seq analysis pipeline where genomic features are aggregated according to hierarchical clustering of transcripts based on exon structure." +Wed,11:40,short talk,Clustering Gene Expressions Based on Spatial Latent Traits,"Alex Cecchetto,Francesco Denti,Davide Risso",,"Spatial transcriptomics enables gene expression analysis in intact tissue. This calls for the development of accurate yet scalable models that incorporate spatial structure while remaining interpretable. To bridge this gap, we present a Bayesian framework for spatial transcriptomics that captures gene-specific spatial variation via a low-rank tensor product B-spline basis. Each observation is modeled using a Poisson distribution with a log-linear rate: \begin{equation} Y_{ij} \mid \eta_{ij} \sim \mathrm{Poisson}\!\left( \exp(\eta_{ij}) \right), \end{equation} where the linear predictor is defined as \begin{equation} \eta_{ij} = U_i^\top V_j + X_i^\top B_j + G_i^\top Z_j. \end{equation} where $U_i^\top V_j$ captures latent factor structure, $X_i^\top B_j$ represent gene-specific spatial effects through spline basis evaluations at location $s_i$, and $G_i^\top Z_j$ models interactions between gene-level covariates and cell-specific weights. The spline representation induces smooth spatial expression surfaces with strong regularization and computational efficiency, providing a practical alternative to pixel-wise spatial effects and computationally intensive Gaussian process formulations. To discover shared expression patterns across genes, the model uses probabilistic clustering. Cluster weights are modeled with a truncated stick-breaking prior, allowing a flexible mixture while remaining compatible with scalable inference. Recent theoretical developments have shown that, in high-dimensional settings, Bayesian clustering can collapse into trivial or unstable solutions. Clustering is therefore performed in the lower-dimensional space of spline-basis coefficients, so groups are determined by shared smoothed spatial signatures rather than noisy spot-level measurements. Posterior inference is performed efficiently via stochastic variational inference (SVI). The method uses minibatches and stochastic gradients to scale to large datasets. The model is implemented in Python using NumPyro, a probabilistic programming library built on JAX. This setup leverages automatic differentiation, just-in-time compilation, and GPU acceleration. As a result, model fitting becomes practical for high-dimensional spatial transcriptomics data." +Wed,11:50,short talk,rhinotypeR enables reproducible rhinovirus genotype assignment from VP4/2 sequences,"Martha M. Luka,Ruth Nanjala,Wafaa M. Rashed,Winfred Gatua,Olaitan I. Awe",,"Rhinoviruses (RVs) are among the most prevalent human respiratory pathogens, yet their molecular characterization remains fragmented across analytical tools and inconsistent between studies. Current genotype assignment typically relies on sequence alignment, pairwise distance calculation, and prototype comparison. This fragmentation hinders reproducibility and scalability. Here, we present rhinotypeR, an open-source R package that provides a scriptable and transparent workflow for RV genotyping based on the VP4/2 genomic region. The package integrates multiple analytical steps; alignment, distance calculation, genotype assignment, and visualization within the Bioconductor ecosystem and applies standardized species-specific thresholds (10.5% for HRV-A/C and 9.5% for HRV-B). Using a validation dataset encompassing over 90% of known RV types, rhinotypeR reproduced pairwise genetic distances obtained with ape and MEGA X with Mantel correlation (r = 1.000, p = 0.001) and negligible numerical deviation (<10⁻¹⁰). Approximately 80% of sequences showed complete agreement with previous genotype assignments by multiple analysts, and most remaining discrepancies occurred near the classification thresholds. Ct value distributions were broadly similar across matched, mismatched, and unassigned sequences, indicating that discrepancies were unlikely to be driven by viral load. By consolidating fragmented analytical steps into a reproducible and automated framework, rhinotypeR improves consistency in rhinovirus genotyping and supports scalable, transparent molecular surveillance. The package is freely available through Bioconductor for research and routine public health applications." +Wed,12:00,short talk,fourSynergy: A framework for ensemble-based analysis of 4C-seq data,Sophie-Marie Wind,,"**fourSynergy: A framework for ensemble-based analysis of 4C-seq data** Sophie-Marie Wind1, Lucas Plagwitz1, Dominik Heider1, Carolin Walter1 1 Institute of Medical Informatics, University of Münster, 48147 Münster, Germany The spatial organization of chromatin is crucial for gene regulation. Alterations can lead to various diseases, for instance cancer. Since changes in chromatin architecture are potentially reversible, they represent a promising target for developing new therapies. Circular Chromosome Conformation Capture Sequencing (4C-seq) is a sequencing technique that provides valuable insights into the interplay between genes and regulatory elements. A benchmarking of available 4C-seq tools has revealed that none of them perform adequately across all main use cases. Thus, the goal of this work is to develop an ensemble algorithm that leverages synergies among existing 4C-seq algorithms, thereby achieving superior predictive performance. We collected 20 curated 4C-seq datasets with validated interactions by comprehensive literature research for our analyses. We employed a weighted voting approach to combine the results from individual tools and optimized the weights according to various predictive metrics using different optimization strategies implemented in the Nevergrad package. We demonstrated the benefits of ensemble interaction calling across diverse 4C-seq datasets and our results show that our ensemble algorithm significantly outperform individual algorithms across various datasets and different performance evaluation metrics. For instance, the base algorithms peakC, r3C-seq, fourSig, and 4C-ker, widely used method for 4C-seq analysis, achieved a median Area Under the Precision-Recall Curve (AUPRC) of 0.122, 0.084, 0.016, and 0.016, respectively. In contrast, our ensemble approach reached a median AUPRC of 0.34 in cross-validation. Similarly, the ensemble algorithm outperformed the other tools in terms of the F1 score, with a median F1 score of 0.262, compared to 0.193 for peakC, 0.158 for r3C-seq, 0.039 for fourSig, and 0.034 for 4C-ker, respectively. To make our weighted voting-based ensemble algorithm accessible to users, we developed a framework called fourSynergy. This framework consists of three main components: a Snakemake pipeline, an R Bioconductor package, and a Shiny app. The pipeline performs interaction calling using the tools integrated into our ensemble algorithm. The R Bioconductor package processes the results of the tools used and allows the user to apply the ensemble algorithm in various modes. Additionally, it provides a range of analyses and visualizations. The Shiny app, runs on top of the Bioconductor package and enables intuitive access to our ensemble calling for all users, facilitating an user-friendly and interactive analysis of the results. Our framework fourSynergy not only offers a comprehensive 4C-seq analysis tool but also demonstrates the integration of Bioconductor packages within larger frameworks. A common challenge in bioinformatic analysis workflows is the seamless integration of diverse tools, often written in different programming languages and with conflicting version dependencies. This can prevent the development of robust workflows and highlights the need for effective integration strategies. Our framework provides a real-world example of how Bioconductor packages can be integrated with other bioinformatics tools, sparking exciting discussions within the Bioconductor community about potential integration strategies. This exchange can lead to innovative solutions and opportunities for collaboration, and help to further bring the Bioconductor community together." +Thu,9:30,short talk,Orchestrating Microbiome Analysis with Bioconductor,"Tuomas Borman,Leo M Lahti",,"Computational methods are essential tools for modern microbiome research. Yet, challenges such as lack of standardization, reproducibility, and transparency often limit reliable analysis and interpretation. Bioconductor addresses these challenges through a global, community-driven network that provides a robust, open-source ecosystem of high-quality tools. The presentation highlights how collaborative development supports microbiome research. In this domain, a growing ecosystem of packages builds on shared Bioconductor data structures such as TreeSummarizedExperiment, which extends the widely adopted SummarizedExperiment class. By relying on interoperable classes and consistent design principles, tools developed by different groups can work seamlessly together. For users, this reduces time spent on data wrangling, broadens access to well-established statistical methods, and facilitates the application and benchmarking of approaches developed across different fields. The collaborative nature of the ecosystem ensures that methods and workflows continue to advance through contributions and shared expertise from a diverse community of developers and users. This common foundation enables robust and evidence-based analysis workflows while supporting methodological innovation. It lowers barriers for collaboration between package authors, encourages method reuse, and accelerates cross-disciplinary exchange. As microbiome research progresses toward multi-omics integration, longitudinal studies, and other increasingly complex study designs, interoperable infrastructure supported by an engaged community becomes increasingly critical for scalable and reproducible analyses." +Thu,9:40,short talk,HoloFoodR: a statistical programming framework for holo-omics data integration workflows,"Artur Sannikov,Tuomas Borman,Robert D. Finn,Morten Tønsberg Limborg,Alexander B. Rogers,Varsha Kale,Kati Hanhineva,Leo M Lahti",Artur Sannikov,"Holo-omics is an emerging research area that integrates multi-omic datasets from the host organism and its microbiome to study their interactions. Recently, curated and openly accessible holo-omic databases have been developed. The HoloFood database, for instance, provides nearly 10 000 holo-omic profiles for salmon and chicken under controlled treatments. However, bridging the gap between holo-omic data resources and algorithmic frameworks remains a challenge. To address it, we introduce HoloFoodR, an R/Bioconductor package, which streamlines access to the HoloFood database. The retrieved data is available in MultiAssayExperiment and TreeSummarizedExperiment formats, enabling simple access to downstream analyses using thousands of packages from the R/Bioconductor ecosystem. Combining the latest advances in statistical programming with curated holo-omic data sets can facilitate the design of open and reproducible research workflows in the emerging field of holo-omics." +Thu,9:50,short talk,"Declarative, YAML-Based workflows for reproducible and scalable microbiome analysis in the mia ecosystem","Dattatray Mongad,Tuomas Borman,Nitin Bayal,Leo M Lahti",,"Background: TreeSummarizedExperiment objects are widely used in microbiome studies, and the mia ecosystem provides functions for tasks such as transformation, diversity profiling, ordination, and association testing. In practice, these analyses are often implemented as scripts or notebooks that evolve over time. As workflows are revised and extended, researchers frequently regenerate selected outputs and explore alternative choices, while intermediate results are scattered across files and folders. This can make it difficult to reproduce prior outputs, compare analysis variants, and share complete analysis settings with collaborators. Methods: We propose a configuration-first workflow template in which users specify an analysis plan in a single YAML file using predefined step types and explicit dependencies. The YAML specification is compiled into a targets-compatible pipeline that records step parameters, manages intermediate outputs, and supports structured provenance. To reduce unnecessary recomputation, the workflow can use cached outputs for downstream analyses. We additionally explore a minimal form-based interface to assist with workflow creation without direct code editing. Results: The template supports common microbiome analyses as modular steps while enforcing a consistent output structure and provenance. The workflow minimizes redundant computation and enables parallel execution of independent steps, thereby improving scalability and performance in high-performance computing environments. Centralizing step definitions in YAML improves traceability of analysis decisions, enables systematic comparison of analysis variants, and simplifies sharing of complete workflow specifications alongside results. Conclusion: A YAML-based workflow description that integrates mia functions with targets can improve consistency, traceability, and computational efficiency in routine microbiome analyses. Future work will expand the step catalog, strengthen configuration validation, and further develop interactive workflow authoring and shared asset management." +Thu,10:00,short talk,BDPE: A Bayesian hierarchical model for differential prevalence analysis with applications in microbiome studies,"Juho Pelto,Kari Auranen,Janne V. Kujala,Leo M Lahti",,"Recent evidence suggests that analyzing the presence/absence of microbial taxa can offer a compelling alternative to differential abundance analysis in microbiome studies. However, standard frequentist approaches face challenges with boundary cases and multiple testing. To address these challenges, we developed the Bayesian Differential Prevalence Estimator (BDPE), a method based on Bayesian hierarchical modeling. The core idea of the new method is that differential prevalence estimates are assumed to arise from a common asymmetric Laplace distribution whose variance and skewness are informed by all taxa collectively. We benchmarked our method against existing differential prevalence and abundance methods using data from 67 human gut microbiome studies. We observed considerable variation in performance across methods, with BDPE outperforming alternatives by combining high sensitivity with effective error control. Notably, our method also demonstrated superior replication of findings across independent studies. Furthermore, BDPE provides differential prevalence estimates and uncertainty intervals that are inherently adjusted for multiple testing. While the method is motivated by and validated in microbiome data, it could be potentially applicable to other types of high-throughput sequencing data. We are preparing a Bioconductor implementation, supporting the TreeSummarizedExperiment data container and the mia ecosystem for microbiome analysis." +Thu,10:10,short talk,LimROTS: a hybrid method integrating empirical Bayes and reproducibility-optimized statistics for robust differential expression analysis,"Ali Mostafa Anwar,Akewak Jeba,Leo M Lahti,Eleanor Coffey",,"Differential expression analysis plays a critical role in omics research by enabling the precise identification of features associated with distinct phenotypes, such as disease and healthy states. Numerous statistical methods have been developed for this purpose, ranging from classical t-tests to advanced approaches such as limma, ROTS, DEqMS, and edgeR. Despite these advances, methods that combine flexibility with reproducibility-optimized statistics for clinical omics data remain limited. Here, we present LimROTS, a hybrid approach that integrates linear regression and empirical Bayes moderation with reproducibility-optimized statistics to produce a novel moderated ranking statistic for robust differential expression analysis across diverse omics data types. The performance of LimROTS was evaluated using twenty-one gold-standard proteomics spike-in datasets covering a wide range of experimental designs and analytical platforms, as well as additional metabolomics and transcriptomics datasets. Our results demonstrate improved accuracy and reproducibility in complex omics settings. LimROTS is available as an R/Bioconductor package (https://doi.org/10.18129/B9.bioc.LimROTS), with all analysis code accessible at https://github.com/AliYoussef96/LimROTSmanuscript" +Thu,10:20,short talk,Benchmarking neighborhood-aware 'DELocal' and introducing 'broadSeq' for differential expression method selection,"Rishi Das Roy,Outi K Hallikas,Jukka Jernvall",,"Despite a mature ecosystem for bulk RNA‑seq, bioinformaticians and computational biologists still face fractured tool-chains and inconsistent data structures that hinder method benchmarking, reproducibility, and integration. We introduce **`broadSeq`**, a Bioconductor package designed to standardize and automate comparative RNA‑seq analyses. `broadSeq` exposes multiple differential expression (DE) methods—**`DELocal`**, `DESeq2`, `limma‑voom`, `edgeR`, `EBSeq`, and others—through consistent R APIs that accept a single, well‑defined gene expression input `SummarizedExperiment` and return primitive R data.frame or list objects, enabling straightforward composition with downstream tools. The package provides end‑to‑end workflow support: normalization, sample QC via clustering, differential expression, and `ggpubr`‑compatible plots with publication‑ready defaults. Benchmarking utilities allow side‑by‑side evaluation across contrasts with reproducible configurations. We use **`broadSeq`** to systematically benchmark DE methods with **`DELocal`**, a neighborhood‑aware DE approach that incorporates local genomic context (expression of nearby genes). Across bulk RNA‑seq datasets comprising wild‑type mouse organs (heart, kidney, liver, brain, forebrain, hindbrain) spanning embryonic development, and human cancer datasets, we evaluated contrasts including organ‑specific upregulation (one organ vs others) and mid‑ vs early developmental stages. `DELocal` consistently identified distinct, biologically coherent gene sets, with ontology‑based evaluations (GO and disease term enrichment) supporting functional relevance. Importantly, `DELocal` tends to prioritize highly expressed genes that conventional fold‑change‑centric methods may miss; by computing a relative log fold change with respect to neighboring genes, it surfaces high‑abundance, functionally pertinent signals even when absolute fold changes are modest. For reproducibility and maintainability, `broadSeq` emphasizes predictable I/O types, vignettes and executable examples, and `ggpubr`‑based visualization. Together, two bioconductor packages **`DELocal`** and **`broadSeq`** extend differential expression discovery and make comparative analyses practical: bioinformaticians and computational biologists can benchmark, select, and integrate the most suitable method for their data without GUI lock‑in, improving workflow robustness and scientific rigor." +Thu,11:30,short talk,Geospatial variation of gut microbiome composition in the FINRISK cohort,"João Paulo Cassucci dos Santos,Leo M Lahti,Himmi Lindgren,Matti Ruuskanen,Aki Havulinna",,"It is known that the human gut microbiome (HGM) has large-scale geographic variation across populations, but the underlying factors for why it has this strong geographical component is still poorly understood and requires further investigation. This work evaluates how enterosignatures, measurements of gut microbiome composition, is distributed in a fine geographic scale across Finland, and we intend to see if patterns arise out of the data modelled spatially. We utilized data from the FINRISK 2002 population survey, which contains metagenomic sequences as well as phenotypical characteristics across six different regions from Finland. Data processing was done with BioConductor microbiome framework based on TreeSummarizedExperiment data container and the mia package. The geographic variation was studied using R-INLA, with Bayesian conditional auto-regressive models. Data were aggregated into a 10x10 km2 regular grid across Finland. We decided to agglomerate the biological data into enterosignatures at the species-level instead of the most common genus-level agglomeration to explore if a higher taxonomic resolution would exhibit new geographical patterns. Most of the enterosignatures had a zero-inflated distribution which was best modelled by a hurdle method. This method models both presence/absence of the enterosignature and for present observations the relative abundance. We also found significantly different geographic patterns for enterosignatures associated with two closely related species that are often grouped together in genus-agglomeration (Prevotella copri and Prevotella rara). The work contributes new geospatial methods that are interoperable with the Bioconductor framework." +Thu,11:40,short talk,Microbiome-based risk prediction in prospective cohort studies,"Ville Laitinen,Leo M Lahti,Oliver Aasmets",,"Prospective cohort studies linking microbiome data with long-term health outcomes enable identification of microbial signatures that could function as biomarkers of disease risk years before clinical onset. However, several properties of microbiome data complicate standard statistical approaches, requiring customized analytical strategies Here, we discuss the emerging statistical and machine learning approaches for microbiome-based risk prediction in time-to-event analyses. We discuss data preprocessing strategies, microbiome feature choices ranging from individual taxa to community-level metrics, and modeling approaches ranging from Cox regression to tree-based methods and deep learning. Using the FINRISK cohort of Finnish adults with nearly 20 years of follow-up, we illustrate how data transformations, sample size, follow-up duration, and model choice influence results revealing temporal heterogeneity in microbial associations and non-linear effects overlooked by simpler models. In addition to practical recommendations for prospective microbiome analyses, we assess availability and gaps of appropriate methods in the Bioconductor ecosystem" +Thu,11:50,short talk,Multi-omics Survival Modeling for Incident Disease Risk Prediction in Cohort Studies,"Sneha Das,Nalin Arora,Geraldson T. Muluh,Tuomas Borman,Aki Havulinna,Leo M Lahti,Himel Mallick",,"Complex diseases are influenced by diverse biological factors. Integrating data across heterogeneous sources, such as multiple omics layers, has been shown to improve the prediction of future health status and incident disease risk. With the growing availability of well-phenotyped longitudinal cohorts, researchers are increasingly adopting survival analysis and time-to-event frameworks to model disease onset and subsequent health trajectories using high-dimensional multi-omics profiles. However, most existing omics-based risk models rely on a single omics layer, limiting both predictive performance and biological insight. Moreover, current multi-omics integration approaches often depend on simple data concatenation or other early-integration strategies that fail to adequately account for heterogeneity across distinct omics modalities. Consequently, the development of principled methods for multi-omics-based risk prediction remains an active area of research, including in population-scale studies of the human microbiome. IntegratedLearner is a previously published framework for multi-omics prediction and classification and is available as an R package on GitHub, with plans to submit the package to Bioconductor in near future. Here, we extend IntegratedLearner to accommodate time-to-event outcomes, enabling principled and efficient integration of multi-omics data for incident disease risk prediction. As a case study, we demonstrate its application to heterogeneous microbiome-derived data layers, including taxonomic composition, functional potential, and metabolomic profiles, allowing information to be shared across omics layers to yield consolidated risk predictions. This unified framework is intended to improve disease risk prediction relative to survival models based on a single omics layer or naive data concatenation across multiple omics layers. We are implementing the proposed methods within a Bioconductor-compatible package, supporting standardized multi-omics data input through the (Tree)SummarizedExperiment and MultiAssayExperiment data structures. This implementation facilitates reproducible and scalable survival analyses in microbiome cohorts and, more broadly, across multi-omics studies." +Thu,12:00,short talk,transmogR: Using genomic variants to modify a reference transcriptome for analysis of an Australian Indigenous population,"Stevie M Pederson,Yassine Souilmi,Alex Brown,Jimmy Breen",,"Incorporation of cohort-consensus variants brings the reference genome closer to that of the participants, providing technically improved quantification, with strong technical and ethical importance for genetically distinct populations. The incorporation of a set of variants into a reference genome is well established using the STARconsensus method[1]. The Bioconductor package transmogR enables the direct creation of variant-modified reference transcriptomes as well as genomes, additionally providing any required genomic co-ordinates post-modification. Modified transcriptomes can then be used by standard transcript-level aligners, such as salmon[2]. Standard output from salmon, including bootstraps, can be rapidly parsed by transmogR, with the capacity for distinct references across a cohort study, opening the possibility for personalised reference transcriptomes, or even simple chrY-excluded references for relevant individuals within a cohort study. In the Australian Aboriginal population, 25% of genomic variants are unique and not seen in existing variant databases[3]. Two transmogR-modified reference transcriptomes were used, during analysis of a pilot study, with one reference excluding chrY-based transcripts for female participants. Whilst differences in quantification were minor for gene-level analysis, noticeable differences were seen for both bootstrap estimates and transcript-level quantification. Using the methods of Baldoni et al[4], the changes in both of these key estimates combined to noticeably impact significance for a non-trivial number of transcripts, whilst the majority of results showed strong consistency with an un-modified reference. This presents an exciting opportunity for identifying clinically relevant mechanisms when disease prevalence appears highly associated with a genetically distinct population. 1. Kaminow, B., Ballouz, S., Gillis, J., & Dobin, A. (2022). Pan-human consensus genome significantly improves the accuracy of RNA-seq analyses. Genome Research, 32(4), 738–749. 2. Srivastava, A., Malik, L., Sarkar, H., Zakeri, M., Almodaresi, F., Soneson, C., Love, M. I., Kingsford, C., & Patro, R. (2020). Alignment and mapping methodology influence transcript abundance estimation. Genome Biology, 21(1), 239. 3. Silcocks, M., Farlow, A., Hermes, A., Tsambos, G., Patel, H. R., Huebner, S., Baynam, G., Jenkins, M. R., Vukcevic, D., Easteal, S., & Leslie, S. (2023). Indigenous Australian genomes show deep structure and rich novel variation. Nature, 1–9. 4. Baldoni, P. L., Chen, Y., Hediyeh-Zadeh, S., Liao, Y., Dong, X., Ritchie, M. E., Shi, W., & Smyth, G. K. (2024). Dividing out quantification uncertainty allows efficient assessment of differential transcript expression with edgeR. Nucleic Acids Research, 52(3), e13." +Wed,03:15:00 PM,short talk,GraphExperiment: infrastructure for network representation of high-dimensional data in R/Bioconductor,Fabrício Almeida-Silva,,"Networks have become a widely used data representation in high-throughput genomics, describing features (e.g., genes, protein, metabolites) and their interactions. Common types of biological networks inferred from quantitative data include gene coexpression, gene regulatory, and co-abundance networks. While data structures to represent graphs (e.g., `igraph`) and quantitative data (e.g., `SummarizedExperiment` and extensions) exist in R, integration between both is currently lacking. Here, we present `GraphExperiment`, an S4 class that extends `SingleCellExperiment` to include a slot for `igraph` objects. `GraphExperiment` allows the simultaneous manipulation of quantitative data and graphs, providing users and developers with a powerful infrastructure that can enhance interoperability between Bioconductor packages. We demonstrate the effectiveness of `GraphExperiment` in a major refactoring of the `BioNERO` package for biological network inference and analysis." +Wed,03:25:00 PM,short talk,"ariadne: an R package to retrieve, wrangle and integrate relational information from knowledge databases","Giulio Benedetti,Thomaz F.S. Bastiaanssen,Eugenia E. Natasha,Tuomas Borman,Leo M Lahti",,"With the continuous advancement of high-throughput techniques in the biological sciences, large multi-modal datasets are becoming increasingly prevalent. In tandem, the research community has developed extensive and interconnected knowledge databases capturing known relations within and across data modalities. However, such relational information between data modalities remains unused unless it is organised into a searchable format and made publicly accessible through online databases, thereby transforming untidy data into new usable knowledge. Although this knowledge is widely available across numerous public databases, it is often difficult to access and even more challenging to integrate into a typical omic data analysis workflow. To this end, we developed ariadne, an R package providing tools to retrieve, wrangle and integrate knowledge from both public (e.g., ChocoPhlAn, Web of Life and UniProt) and user-defined databases, linking microbial features not only to their genetic makeup but also to the metabolites they produce, their biological functions, and ultimately their impact on host’s health and the environment. Our package relies on the novel MultiFactor class, designed for optimised storage and efficient translation of relational information. When possible, the SPARQL semantic query language is used to retrieve knowledge by flexible and rapid queries to online databases. This framework interoperates with and enhances the SummarizedExperiment-based ecosystem by integrating knowledge into analytical routines such as module-wise and microbe set enrichment analyses — an area that has, until now, lacked easy access to such valuable resources." +Wed,03:35:00 PM,short talk,Comprehensive LC-MS metabolomics data processing with notame R/Bioconductor package,"Ville Koistinen,Retu Haikonen,Atte Lihtamo,Vilhelm Suksi,Olli Kärkkäinen,Leo M Lahti,Kati Hanhineva",,"Liquid chromatography–mass spectrometry (LC-MS) is widely used in metabolomics. Raw LC-MS data is relatively complex, consisting of molecular features originating not only from unique metabolites but also from redundant adducts, in-source fragments, artefacts, and impurities. It is also prone to signal intensity drift during long sequences, missing values, and false positives in statistical tests. To tackle these instrument-related and data-dependent challenges with robust pre-processing and quality evaluation tools, we describe here $\texttt{notame}$ R package bundle, which recently became available as a R/Bioconductor release and now supports $\texttt{SummarizedExperiment}$. It pre-processes LC-MS metabolomics data by correcting signal intensity drift, flagging potential low-quality and contaminant features, imputing missing values, and clustering features likely originating from the same metabolite. Most of the functions include default recommended values that can be modified by the user. Univariate and multivariate statistics with parametric and non-parametric alternatives and false discovery rate can be performed with $\texttt{notameStats}$ package. Results and data visualisations, such as quality control figures, PCA, heatmaps, volcano plots, and feature-wise graphs, are available in $\texttt{notameViz}$ package. Together, these packages contribute to a complete metabolomics data analysis workflow, connecting the phases between signal detection/alignment and metabolite identification while producing publication-ready illustrations." +Fri,9:30,short talk,Bioconductor software integration into the ELIXIR Research Software Ecosystem through metadata standardization,"Claire Rioualen,Vincent James Carey,Sean Davis,Maria Doyle,Sebastian Lobentanzer,Hervé Ménager",,"Background The Bioconductor community provides an open collection of more than 2,000 software packages for users worldwide. They are broadly used and downloaded from its repository, and are also available on many other platforms and through a variety of services, such as Bioconda, Galaxy, nf-core pipelines, and WorkflowHub, among others. The visibility of those carefully documented tools could be further expanded by implementing additional metadata conventions. The ELIXIR Research Software Ecosystem (RSEc) aims to enhance software discoverability across projects and platforms by extracting and standardizing package and software metadata, and centralizing those through the ELIXIR bio.tools repository. The bio.tools repository, which currently contains more than 30,000 curated entries, aims to improve visibility and findability, benefiting both users and developers. It also offers centralized software metadata curation efforts and cross-linking of software using bio.tools identifiers and associated metadata. Objectives This project aims to further improve Bioconductor package annotations and FAIRness by synchronizing their metadata within the RSEc. This is achieved by leveraging existing, extensively used standards for schema markup, ontologies, and semantic specifications. By increasing metadata interoperability, those shared principles allow to further integrate resources, improve their visibility, and reach yet a wider range of users worldwide. By implementing those shared conventions, this project also contributes to consolidating interactions between the Bioconductor community and the ELIXIR infrastructure, two major international organisations promoting good practices and open source resources in the domain of bioinformatics. Results This collaboration allowed to import Bioconductor software package metadata into the ELIXIR RSEc. About 1,500 existing Bioconductor software descriptions were updated, and another 700 package entries were newly created. By using the ELIXIR bio.tools registry as a hub for metadata uniformization, the RSEc contributes to assisting curation efforts and enhancing software visibility across platforms. We are also using AI agents and developing a Model Context Protocol server that can facilitate metadata suggestions or aid software metadata curators with decision support backed by standard ontologies such as the EDAM ontology. Perspectives This project aims to foster mutual interactions between package developers, curators and users. Consequently, our next steps include working on guidelines to promote these standards, and facilitate their use in close collaboration with package developers." +Fri,9:40,short talk,CudaMon: Monitor GPU and CPU resource consumption within R,"Mohammad Amin Zadenoori,Davide Risso,Gabriele Sales",,"CudaMon is an R package that provides integrated, high-resolution system and GPU performance monitoring for computational workflows leveraging GPU acceleration. By extending the capabilities of Rcollectl, CudaMon offers real-time tracking of CPU, memory, and disk I/O alongside GPU utilization, temperature, power consumption, and memory usage through two backends: a basic Collectl interface and the more detailed NVIDIA Management Library (NVML). Designed for profiling and optimizing GPU-accelerated R computations with NVIDIA hardware, the tool allows users to start and stop monitoring programmatically, insert timestamps at key workflow phases, and generate unified visualizations of CPU/GPU activity. The importance of CudaMon lies in its ability to address a critical gap in the R ecosystem: the lack of lightweight, unified profiling tools for heterogeneous (CPU+GPU) computing environments. As R increasingly interfaces with GPU-accelerated libraries, understanding resource bottlenecks—such as GPU underutilization, memory constraints, or CPU–GPU data transfer inefficiencies—becomes essential for performance tuning and scalability. By providing continuous, background monitoring with minimal overhead, CudaMon enables researchers and developers to correlate computational stages with system metrics, optimize code, validate resource allocation, and ensure efficient use of expensive hardware. We will showcase CudaMon usage by benchmarking the performance of different BLAS implementations that rely on CPU and/or GPU, such as openBLAS, cuBLAS, and NVBLAS, for different matrix operations." +,,poster,msqrob2book: Orchestrating Mass Spectrometry Based Proteomics with msqrob2,"Christophe Vanderaa,Stijn Vandenbulcke,Lieven Clement",,"Mass spectrometry (MS) is the method of choice when exploring the proteome landscape in biological systems. The sensitivity and the versatility of the technology has led to the development of diverse protocols and acquisition strategies, but their data analysis and interpretation is hindered complex technicalities, requiring expert knowledge when developing workflows for answering the research question at hand. We therefore developed the msqrob framework that provides a suite of flexible and standardised statistical workflows aimed at reducing the gap between data generation and data interpretation, and is implemented in the R/Bioconductor package, msqrob2. To celebrate the tenth anniversary of our msqrob framework [1], we developed the ""msqrob2book"": a set of thoroughly documented and hands-on vignettes for the statistical analysis of MS-based data [2]. It includes the latest improvements of the software, showcased on a wide panel of use cases. The first chapters progressively introduces important statistical concepts from a basic workflow up to the most advanced models. The following chapters demonstrate the application of msqrob2 for assessing different biological questions using data from different experimental designs, acquisition strategies, instruments, and search engines. The book is designed to help users tailoring their statistical analysis workflow to their specific datasets and research questions. The book also represents a welcoming entry point for targeted to either proteomics practitioners or data analysts/bioinformaticians interested in learning how to analyse MS-based proteomics data. [1] Goeminne LJE, Gevaert K, Clement L. (2016). Peptide-level Robust Ridge Regression Improves Estimation, Sensitivity, and Specificity in Data-dependent Quantitative Label-free Shotgun Proteomics. Mol Cell Proteomics. 15(2):657-668. [2] Vanderaa, C., Vandenbulcke, S. and Clement, L. (2026). msqrob2book: Statistical analysis of mass spectrometry-based proteomics data. https://statomics.github.io/msqrob2book" +,,poster,Integration of Public Mass Spectrometry Data in Reproducible Metabolomics Data Analysis Workflows,"Gabriele Tomè,Ahlam Mentag,Philippine Louail,Johannes Rainer",,"Raw and processed data from metabolomics and lipidomics experiments can be deposited in public repositories such as MetaboLights, MassIVE or MetabolomicsWorkbench. In addition, various public metabolite annotation resources including spectral libraries exist, albeit without standardized metadata or a common format. The *Spectra* Bioconductor package provides a flexible infrastructure to handle and process mass spectrometry (MS) data from proteomics or metabolomics experiments. The strict separation of functionality for MS data handling, storage and representation from user-faced functions for data analysis facilitates expansion of *Spectra* to additional file types, data formats and resources. Dedicated backend implementations enable a direct access to MS data from public repositories simplifying hence integration of such data into reproducible data analysis workflows. Currently, the *MsBackendMetaboLights* Bioconductor package enables access to MS data from MetaboLights and analogous packages for data from MassIVE and MetabolomicsWorkbench are being developed as part of the project ""Data analysis infrastructure MetaRbolomics4Galaxy"". Similarly, the *MsBackendMassbank* package adds support and enables access to the small molecule annotation resource MassBank, and, combined with the *CompoundDb* Bioconductor package, allows creation of small, redistributable SQLite annotation databases. Support for additional annotation resources, provided through Figshare or Zenodo, or formats, such as mzSpecLib, will be added in future. Funding information: this work is co-funded by the Autonomous Province of Bolzano under the Joint Project *MetaRBolomics4Galaxy* (CUP: D53C25001030003)." +,,poster,Improving the detection of differential promoter-enhancer interactions in capture Hi-C data by sharing data between neighbouring restriction fragments,"Marco Geigges,Charlotte Soneson,Adwait Salvi,Filippo Rijli,Michael B Stadler",,"Capture Hi-C (CHi-C) is a sequencing-based method to study three-dimensional chromosomal interactions of pre-selected genomic regions, such as promoters, with all other genomic regions at the resolution of restriction fragments. This allows for the identification of long-range chromatin interactions between promoters and regulatory elements like enhancers that are crucial for understanding gene regulation and the spatial organization of the genome. Several bioinformatic tools are available for the quality control, the processing of sequencing reads and the identification of chromosomal interactions in CHi-C data. However, the detection of statistically significant differential interactions between conditions remains a challenge due to the sparsity of the data especially at long interaction distances. As the CHi-C signal at a given locus is usually not restricted to a single restriction fragment but spread over surrounding fragments, reads from several restriction fragments are often aggregated to increase power for differential signal detection. Usually, the reads from a fixed number of neighbouring fragments around each interacting fragment are pooled for this purpose. However, this approach does not consider the signal strength over background at the surrounding fragments and may lead to redundant use of data or suboptimal loss of resolution. To overcome this problem, we developed a new approach of aggregating CHi-C signal from neighbouring restriction fragments in a data-driven way. We focus on restriction fragments that contain a clear signal over background and use them to build a hierarchical tree of interacting fragments based on their genomic position. Differential interaction analysis is then performed on this tree-based representation of the data, striking a data-driven balance between increased statistical power and loss of resolution due to aggregation of concordant fragments. To interpret significantly differential interactions specifically identified by this tree-based method, we use ATAC-seq data from the same experimental conditions for comparison. Ensuring accessibility and reproducibility, we plan to integrate this new method into the Bioconductor framework. Our novel approach allows to reliably detect interaction differences between conditions when the signal at individual restriction fragments might not be strong enough. It thus helps to refine the interpretation of differential interactions between promoters and enhancers from any CHi-C data and can contribute to further understanding the role of distal regulatory elements in gene regulation." +,,poster,Benchmarking preprocessing of spatial omics data for pathway activity estimation,"Pere Moles-Seró,Andrew J Sedgewick,Justin Guinney,Robert Castelo",,"Gene Set Variation Analysis (GSVA) is a non-parametric, unsupervised method that estimates variation in gene set enrichment across samples within an expression dataset. By transforming data from a gene-by-sample matrix into a gene-set–by–sample matrix, GSVA enables pathway-level characterization at the individual sample level. GSVA has been extended to operate on SpatialExperiment objects, treating each spatial spot as an individual sample. In this poster, we present initial efforts to benchmark GSVA performance in the context of spatial transcriptomics data and to evaluate potential methodological adaptations that account for spatial information. To benchmark GSVA, we simulated genes with diverse spatial patterns and added random noise. We then constructed pathways composed of genes sharing similar spatial patterns, with the goal of assessing whether GSVA preserves a single spatial pattern that summarizes the spatial distributions of genes within each pathway. We evaluated the impact of different normalization strategies, including Scran and SpaNorm, as well as denoising and smoothing techniques such as Gaussian filters to address challenges such as missing or outlier spots. Our preliminary results indicate that selecting appropriate normalization and preprocessing techniques can improve the performance of GSVA in estimating pathway activity for spatial transcriptomics data." +,,poster,SingleMoleculeGenomicsIO - an efficient framework for reading and representing single molecule footprinting data in R,"Charlotte Soneson,Panagiotis Papasaikas,Hervé Pagès,Dimos Gaidatzis,Lisa Baumgartner,Nikolas Eggers,Simone Faravelli,Lukas Burger,Dirk Schübeler,Sebastien Smallwood,Michael B Stadler",,"In recent years, technological advances have driven the study of gene regulation and chromatin biology toward single-cell and single-molecule resolution assays. One such example is single molecule footprinting, where chromatin (DNA and bound proteins) is incubated with an enzyme to modify accessible nucleotides. The resulting modification patterns, providing a near base-resolution view of the chromatin accessibility, are then read out for individual molecules using either short-read or long-read sequencing. The resulting data poses some practical challenges for the analyst, for example stemming from the need to efficiently manage extensive individual read-level data in addition to summaries across reads. In this poster, we will describe SingleMoleculeGenomicsIO (https://github.com/fmicompbio/SingleMoleculeGenomicsIO), an infrastructure package providing capabilities for reading single molecule genomics data from most commonly used file formats, including modBam files generated by Oxford Nanopore and PacBio basecallers, modkit output files, and bam files where modifications are represented by means of sequence mismatches (such as those from bisulfite sequencing or deaminase-based footprinting protocols). Regardless of the raw data format, SingleMoleculeGenomicsIO consistently reads data into a SummarizedExperiment object, in which summary-level data is represented using regular matrices and read-level data is efficiently stored in a nested DataFrame. This ability to consistently and efficiently import data generated by several different platforms further enabled us to perform a systematic investigation and comparison of sequence-dependent biases in reported accessibility measures. We will describe the result of this investigation, focusing on adenine methylation readouts from both Oxford Nanopore and PacBio-generated data." +,,poster,gamdid: An R package for interpretable differential distributional analysis,"Lucas Beerland,Stijn Vandenbulcke,Christophe Vanderaa,Lieven Clement",,"Recent advancements in mass spectometry based single cell proteomics (SCP) enabled the characterization of cellullar heterogeneity across conditions at unprecedented resolution. However, current SCP data analysis workflows still focus on comparing average protein abundances and overlook informative distributional changes in shape, such as differences in variability and/or modality, limiting the advantage of SCP over bulk proteomics. We therefore propose gamdid: a novel statistical framework for SCP data to infer distributional differences between conditions, implemented in an R/Bioconductor package. gamdid builds on Lindsey’s Method, which recasts the density estimation into a Poisson regression problem i.e., by fitting smooth histograms with a large number of equally spaced bins using a basis function expansion. gamdid can prioritize proteins that exhibit differential distributions across conditions using Wald-tests. These tests assess the omnibus null hypothesis of a common density across conditions. Moreover, gamdid can test pairwise contrasts between the groupwise smoothers to infer regions with distributional differences between conditions. This also provides our users with intuitive plots that visualize the density estimators in both conditions, highlight regions with differential distributions, and have a one-to-one relation to the models and hypothesis tests. In this poster we walk our users step-by-step through gamdid’s functionalities and intuitive plots. We showcase how gamdid is a first step to leverage the wealth of information in SCP data, which offers a novel perspective to study single-cell heterogeneity and to compare the protein abundance distribution in populations of single cells that differ in cell type, biological conditions, or treatment." +,,poster,Benchmarking differential expression strategies for multi-sample scRNA-seq,"Anna Bortolato,Chiara Romualdi",,"Single-cell RNA sequencing (scRNA-seq) is widely used to study complex biological processes and human diseases, such as cancer, due to its higher resolution compared to bulk RNA-seq. However, scRNA-seq experiments often exhibit a hierarchical structure, with cells deriving from multiple individuals. In this scenario, cells coming from the same subject share genetic information and therefore are not statistically independent. A common approach to address intra-subject correlation is pseudo-bulk aggregation at the individual level, which comes at the cost of reducing the number of observations leading to a substantial loss of statistical power. Mixed-effects models offer a valid alternative, since they allow to retain single-cell resolution explicitly accounting for within-subject variability by specifying individuals as random effects. Yet, these models are computationally demanding and often unstable. Here, we systematically compared multiple methods for differential expression analysis across diverse scenarios in multi-sample scRNA-seq data, including cell-level tests ignoring intra-subject correlation, pseudo-bulk followed by standard bulk analysis, mixed-effects models, and non-parametric approaches such as hierarchical bootstrap. We introduce a novel mini-bulk approach, combining pseudo-bulk aggregation, bootstrap resampling, and mixed-effects modeling. Our results show that mini-bulk achieves comparable statistical power to single-cell mixed models, while dramatically reducing computational costs, providing an efficient and robust framework for differential expression analysis in multi-sample scRNA-seq experiments." +,,poster,motifTestR: A Bioconductor package for testing motif over-representation or positional bias,Stevie M Pederson,,"The package motifTestR offers the ability to test for motif enrichment or for any positional bias within a set of sequences, in a completely R-native manner. No additional installation of external tools or software is required, beyond standard Bioconductor dependencies. Results are returned in an easy to interpret and straightforward manner, able to be easily incorporated with complementary analyses using the Bioconductor ecosystem. All analytic methods are implemented to analyse individual Position Weight Matrices (PWMs), or by grouping highly similar PWMs into a cluster of motifs. Positional bias within a set of sequences is tested using fixed-width bins and a bin-specific binomial test, with a summarised p-value across all bins returned using the harmonic mean p-value. Overall enrichment testing can be performed using hypergeometric, poisson or quasipoisson models, or by non-parametric Monte Carlo methods. Background sequences can additionally be drawn from the genome to match the distribution of genomic features associated with any given set of sequences. Whilst enrichment testing can be computationally demanding, motifTestR works efficiently in a parallelised manner on an HPC, where large compute resources may be more accessible, or can be run on a local laptop if needed. Benchmarking against centrimo and ame from the MEME Suite shows highly comparable performance, both in terms of computational resources and results from statistical testing, but without the need for any external software." +,,poster,QFeatures: A Robust Infrastructure for Quantitative Proteomics in R/Bioconductor,"Léopold Guyot,Laurent Gatto,Christophe Vanderaa",,"Quantitative proteomics generates complex datasets that contain multiple data levels, from PSMs to peptides to proteins. QFeatures is an R/Bioconductor package designed to simplify the management, processing, exploration, and analysis of such datasets. At the heart of QFeatures is a data infrastructure based on quantitative sets and set links, that record the relationships between the sets. Each analysis step creates a new set linked to its parent(s), enabling transparent tracking of transformations, filtering and aggregations. This is particularly powerful for aggregating data from PSMs to peptides to proteins, as QFeatures allows researchers to trace and visualise intensity values across multiple levels, facilitating the exploration of the data in a fully reproducible manner. QFeatures is also designed to work seamlessly with other Bioconductor proteomics packages, such as msqrob2 for proteomics statistical modeling and scp for single-cell proteomics. By combining a robust infrastructure and interoperability with other packages, QFeatures allows researchers to handle quantitative proteomics datasets efficiently and reproducibly." +,,poster,Sovereign Genomic Identity: A Zero-Knowledge Architecture for Privacy-Preserving Risk Assessment,"Egor Ermolaev,Maryna Chepeleva",,"The expansion of personalized medicine poses a critical challenge in data privacy. Institutions such as insurance companies, clinical trial recruiters, and employers increasingly seek to utilize genomic insights for risk assessment. However, the current requirement to share raw genomic data (e.g., VCF files) forces individuals to expose their entire genetic code to verify a single trait. This ""all-or-nothing"" disclosure model creates unacceptable liabilities regarding privacy, re-identification, and data leakage, directly conflicting with the data minimization principles of GDPR. To circumvent these systemic risks, the industry requires a technological framework that enables the secure verification of biological properties without the transmission of the underlying biological data. In this work, we propose a paradigm shift from data exchange to proof exchange. We present a protocol for ""Sovereign Genomic Identity,"" powered by Zero-Knowledge Proofs (ZKP). This architecture allows for the reliable verification of biological properties while the raw data remains exclusively in the user's custody. Our proposed architecture establishes a certified clinical laboratory as a cryptographic Trust Anchor. Instead of retaining sensitive patient data, the laboratory issues a digital signature over the user's VCF file, which is then transferred to the user's local secure environment (e.g., a smartphone wallet). When a third party requires a specific genomic insight - such as confirming that a Polygenic Risk Score falls within a specific low-risk range - the computation is executed entirely on the client side. We utilize zk-SNARK arithmetic circuits to generate a succinct proof. This proof mathematically attests that the risk score was calculated correctly according to a public model using a valid, untampered VCF signed by the Trust Anchor, without ever revealing the specific variants or the exact score to the verifier. By decoupling the validity of the computation from the visibility of the data, we demonstrate a practical pathway to ""Compute-over-Data."" This approach resolves the tension between analytical precision and privacy, enabling a new standard of digital sovereignty where individuals can utilize their genomic insights without compromising ownership." +,,poster,The iSEE package ecosystem - Extending data exploration and visualization,"Federico Marini,Kevin Christophe Rue-Albrecht,Charlotte Soneson,Aaron Lun",,"The iSEE (Interactive SummarizedExperiment Explorer) Bioconductor package is a universal interface to explore and visualize any object stored as a SummarizedExperiment. This enables iSEE to work seamlessly with bulk and single-cell transcriptomics data, but also proteomics, spatial transcriptomics and many other data types, not only in the early stages of a project, but also to accompany publications as interactive web applications. While the main functionality and many general features of iSEE are known to the Bioconductor community, there is an ever expanding universe of packages that extend and enhance iSEE, contributed by the iSEE core team and by external developers. Our work will report on known extension packages such as iSEEde, iSEEpathways, iSEEindex, iSEEfier, and iSEEtree that altogether complement the essential features of iSEE, putting a spotlight on some use cases that are relevant to the members of the Bioconductor community." +,,poster,Spatial Deconvolution and Interaction Analysis in Complex Tissue Models,"Maryna Chepeleva,Emma Rigg,Frits Thorsen,Petr V. Nazarov",,"Spatial transcriptomics maps gene expression on tissue architecture, yet the analysis is frequently confounded by ""mixed spots"" containing signals from multiple cell types and, in xenograft settings, transcripts from different species. Dissecting these signals is crucial for understanding tumor-microenvironment interaction. In this work, we explore signal decomposition strategies to resolve spatial heterogeneity and identify localized biological programs in patient samples and xenograft experiments. Using whole-tissue slides from human melanoma samples, we demonstrate that data-driven signal deconvolution performed by consICA (consensus Independent Component Analysis) captures spatially resolved gradients of activity. Unlike discrete clustering, this approach treats the tissue as a continuum of overlapping signals, allowing for a more granular representation of tissue architecture. We successfully isolated distinct signatures, such as localized immune infiltration and angiogenic activity, validating their biological relevance through functional enrichment analysis of the top-contributing genes. We further apply this deconvolution approach to an animal model where human melanoma brain metastasis cell lines are injected intracardially into NOD/SCID mice. This experimental setup is characterized by the spatial co-occurrence of host and graft transcripts in the mouse brain with dual-genome alignment. Decomposition enables us separation of species-associated expression programs, allowing for downstream analysis of spatial host-tumor relationships and their perturbation by external factors (e.g., extracellular vesicle treatments) as shifts in component activity across the tumor-brain interface. Our findings suggest that decomposition-based methods are particularly well-suited for datasets in which biological boundaries are poorly defined or cross-species signals interfere. Robust signal separation improves interpretability of spatial omics in challenging experimental designs and provides a compact basis for comparative spatial analyses across cohorts and perturbations." +,,poster,A survey of functional overlap and design patterns in the SummarizedExperiment ecosystem,"Geraldson T. Muluh,Aki Havulinna,Leo M Lahti",,"The SummarizedExperiment class has become the standard for storing microbiome and metabolome data within Bioconductor. While this shared data structure theoretically allows users to easily combine tools, it has also led to a fragmented software landscape in which multiple packages implement identical statistical or visualization methods. This redundancy creates confusion for analysts and increases developers' maintenance burden. This work surveys the functional overlap between the mia family (mia, miaViz, miaTime), the metabolomics package notame, and the external toolkit MicrobiomeStat. We systematically categorize functions related to differential abundance, diversity, and ordination to identify direct duplicates. Furthermore, we evaluate how each framework handles complex study designs, specifically focusing on the implementation of longitudinal time-series and paired-sample analyses. By documenting these redundancies and capabilities, this survey aims to guide future development efforts to refactor these packages, ultimately moving towards a more modular, less redundant ecosystem for multi-omics analysis." +,,poster,"Tiny RNAs, Huge Responsibility: A Bioinformatic Tale of Paternal Inheritance","Alessandro Gozzo,Signe Isacson,Lovisa Örkenby Kämpe,Anna Asratian,Unn Kugelberg,Anita Öst",,"The paternal germline serves as a repository of intricate genetic information contributing to offspring development and fitness. In addition to genomic DNA, spermatozoa carry diverse classes of small non-coding RNA (sncRNA) that undergo dynamic remodelling during spermatogenesis, producing a highly specific mature sperm RNA profile. Among these, mitochondrial-derived small non-coding RNAs (mitosRNA) remain poorly characterized despite their abundance in the male gonads. Using Seqpac, an R package implementing a sequence-based workflow for sncRNA analysis, we computationally resolved the composition and origin of Drosophila sperm mitosRNA. Our analysis revealed most mitosRNA to map to piRNAs, share characteristic length distributions and nucleotide biases, and exhibit Argonaute-dependent biogenesis signatures. Furthermore, integrative transcriptomic analyses demonstrated that perturbation of germline Piwi and Aubergine results in coordinated metabolic pathway shifts in testes and in offspring embryos. This study demonstrates how sequence-centric bioinformatic frameworks such as Seqpac enable high-resolution interrogation of complex sncRNA populations and facilitate the discovery of intergenerational regulatory signals. By providing a reproducible, flexible, and scalable workflow for sncRNA profiling, Seqpac advances our capacity to address intricate questions of inheritance and transcription regulation." +,,poster,The Role of the Gut Microbiome in Depressive Symptoms: Systems Ecology-Informed Harmonization and Meta-Analysis across Eight Cohorts,"Eugenia E. Natasha,André G. Uitterlinden,Anja Lok,Robert Kraaij,Thomaz F.S. Bastiaanssen",,"Background: The involvement of gut microbiota-brain axis in depressive symptoms has emphasized the importance of advancing investigations of the role of gut microbiota. Currently, mechanistic links between gut microbial ecology and depressive symptoms remain underexplored, partially due to difficulties in identifying robust signals across heterogeneous population cohorts and microbiome assays. This is exacerbated by the statistical properties of microbiome data (e.g., compositional and zero-inflated). Objective: (1) Identify gut microbial features robustly linked to depressive symptom; (2) Develop a harmonized and reproducible (meta-)analysis workflow compatible with Bioconductor infrastructures to support robust cross-cohort inferences. Methods: We will conduct a harmonized individual participant data meta-analysis of eight large-scale population studies (N = 23,344) from Europe and the USA. Given the scale of the data and heterogeneity, analysis steps will be done within a streamlined and well-documented workflow to ensure reproducibility. Taxonomic profiles will be estimated from 16S rRNA and/or shotgun metagenomic reads by mapping sequences to the Greengenes2 database (v2024.09), allowing phylogeny-based harmonization across both techniques. Downstream processing is performed in R using Bioconductor packages, namely mia and microbiome. To address compositionality, per-sample CLR transformation utilizing the same packages will be applied prior to association tests. For cohorts with shotgun data, functional genes will be used to derive metabolic pathway and gut-brain module abundances to capture higher-resolution ecological features. Depressive symptoms are quantified using standardized and validated instruments (CES-D or PHQ-9). Within each cohort, associations will be estimated using linear regression to derive summary statistics, and these cohort-level summary statistics will be synthesized using weighted meta-analyses. All statistical analyses will be written and performed in R using reproducible Bioconductor infrastructures, including the TreeSummarizedExperiment and MultiAssayExperiment data containers. Results: 16S rRNA sequencing reads from one cohort have been analyzed using the developed workflow. Summary statistics are currently being computed along the way while developing the downstream part of the workflow. Our approach to harmonize cross-cohort analyses using a well-documented workflow as a tool will help us arrive at better mechanistic insights of gut microbial ecology and its link to depressive symptoms." +,,poster,EMMA: Enrichment Methods Matter for achieving fully Reproducible Pathway Analysis,"Najla Abassi,Annekathrin Silvia Nedwed,Federico Marini",,"Functional Enrichment Analysis (FEA) is a key downstream step in omics data analysis, commonly applied after differential expression analysis to generate pathway-level hypotheses and support biological interpretation. The growing number of tools and methods for performing FEA has led to substantial heterogeneity in both analytical choices and reported results. Wijesooriya et al. (2022) (doi: 10.1371/journal.pcbi.1009935) showed that, despite the popularity of FEAs, methodological issues (e.g. the inappropriate background gene definitions, or the lack of detail provided in the Materials and Methods section) can compromise the validity and reproducibility of many research endeavors that apply such methods without adequate specification. To date, however, no enforced reporting standard exists to ensure transparent and reproducible documentation of FEA workflows. To address this gap, we introduce EMMA, a framework designed to automatically allow the recording of key analytical parameters and settings during the generation and reporting of FEA results. Implemented as an R/Bioconductor package, EMMA enables the execution of FEAs using existing tools (e.g. clusterProfiler, topGO, Enrichr, gprofiler2) while systematically capturing analysis parameters and provenance information during runtime, and returning enrichment results in their standard format alongside structured and reusable metadata. By coupling execution with structured metadata collection, EMMA aims to simplify the (re-)execution of enrichment workflows, promote correct and transparent use of widely adopted methods, and facilitate comprehensive reporting of FEA results in scientific reports or manuscripts. EMMA is publicly available in its development version on GitHub under the MIT license at https://github.com/imbeimainz/EMMA" +,,poster,Approaches for disease modeling and precision medicine with human in vitro neuronal models integrating multimodal data analysis,"Oskari Kulta,Susanna Narkilahti",,"Human in vitro neuronal models are recognized as valid tools to bridge the translational gap between preclinical studies and human neurological disease. Yet their predictive power depends on physiologically relevant cellular organization and maturation, multimodal data collection and integration, and advanced computational analysis. Here, we present a set of human pluripotent stem cell (hPSC)–derived in vitro platforms spanning central and peripheral nervous system neurons, innervated multicellular systems, and organ-on-chip (OoC) architectures, with the aim of generating human-relevant, data-rich disease models and developing comprehensive multimodal analysis strategies. By combining OoC technology, molecular biology methods, multielectrode array (MEA) electrophysiology, and advanced MEA signal-processing pipelines, we characterize spontaneous and induced neuronal network dynamics across health and disease, including genetic epilepsies such as Dravet syndrome (DS), chemically induced seizure models, stroke-like perturbations, neurodegenerative diseases such as Parkinson disease (PD), brain cancer and innervation of peripheral tissues. In patient derived cortical networks modeling DS, we observed variant-specific and clinically relevant alterations in excitatory–inhibitory balance, heightened network excitability, and increased sensitivity to seizure-like activity [1], while pharmacological induction with kainic acid produced reproducible functional fingerprints aligned with established seizure models [2]. Brain-on-chip and seizure-on-chip systems enable spatially organized, axonally connected network circuitry models in which seizure initiation, propagation, and synchronization [3] as well as early phases on alpha synuclein propagation in PD [4] can be studied, revealing divergent functional behaviors and connectivity changes across networks related to different disease mechanisms. In parallel, we have developed and validated advanced synchronization and functional connectivity analyses to capture multi-level circuit dynamics not accessible with conventional MEA analysis tools [5,6]. To model tumor–CNS interactions, we are developing a 3D glioblastoma brain-on-chip platform integrating tumor spheroids into pre-existing neuronal and vascular networks [7,9]. Furthermore, we aim to create innervation models of peripheral organs to enable disease studies at a systemic, multi-tissue level. As proof-of-concept, we have reported successful cocultures of neuro-cardiac [8], neuro-vascular [9], and neuro-adipose [10] setups revealing positive net effect of innervation in target cell type maturation. We generate and functionally characterize hPSC-derived peripheral neurons (Kulta et al, manuscript in submission), which exhibit robust spontaneous activity, distinct electrophysiological maturation profiles, neurotransmitter secretion, and pharmacological responsiveness. They have been successfully integrated into innervated organ-on-chip systems, including neuro-cardiac (Pesu et al, manuscript in submission) multicultures and utilized in 3D bioprinting axonal guidance model [11]. Across these platforms, electrophysiological phenotyping is complemented by advanced image analysis and transcriptomic profiling has been initiated. We publicly share large MEA datasets, 2TB, raw recordings, extracted features, and analysis code to support reproducibility and computational method development [6]. Together, our results demonstrate that advanced human in vitro models combined with MEA electrophysiology yield disease-specific network-level functional fingerprints. For more effective exploitation of our in vitro platforms and datasets, a stronger integration with omics-level analysis is required. Thus, we actively seek experts of bioinformatics, signal processing and computational modeling to co-develop multimodal connectomics approaches that integrate electrophysiological phenotypes with genomic and transcriptomic data, enabling more powerful, patient-specific in vitro disease models and advancing precision medicine applications. [1] Mzezewa et al., 2025, Neurosci Res. https://doi.org/10.1016/j.neures.2025.104958 [2] Isosaari & Kulta et. al., 2025, Neuroscience. https://doi.org/10.1016/j.neuroscience.2025.11.033 [3] Pelkonen et al., 2020, Biosens Bioelectron. https://doi.org/10.1016/j.bios.2020.112553 [4] Kapucu et al., 2024, NPJ Parkinsons Dis. https://doi.org/10.1038/s41531-024-00750-x [5] Vinogradov et al., 2024, eNeuro. https://doi.org/10.1523/ENEURO.0035-24.2024 [6] Kapucu, F.E., et al., 2022, https://doi.org/10.1038/s41597-022-01242-4 [7] Förster, N. & Isosaari, L., et al., 2025, https://doi.org/10.1096/fj.202500291RR [8] Häkli & Jäntti et al., 2022, https://doi.org/10.3390/ijms23063148 [9] Isosaari, L., et al., 2023, https://doi.org/10.1186/s12964-023-01159-4 [10] Saarimaa, S., et al., 2025, https://doi.org/10.1186/s12964-025-02544-x [11] Honkamäki L, et al., 2025, https://doi.org/10.1002/adhm.202402504" +,,poster,Seqpac - sequence-based analysis of small RNA in R,"Alessandro Gozzo,Anna Asratian,Lovisa Örkenby Kämpe,Signe Isacson",,"RNA-sequencing has evolved into a major cornerstone of modern molecular biology with a myriad of excellent tools for its preprocessing, annotation and analysis. Along with this, laboratory techniques for sequencing of small non-coding RNA (sRNA) have developed however, with fewer options of analytical pipelines available. As sRNA are short, non-coding and come in many different classes, special consideration needs to be taken to these aspects of sRNA analysis. We thus developed Seqpac - an R package containing the full workflow from raw fastq file, through normalisation and annotation to analyses and graphs. The base of the package is the retention of the original sequence and the PAC object - an S4 object containing metadata within Pheno, mapping and other information in Anno and raw counts in Counts. The retention of the sequences enables a feature-agnostic workflow, well-suited for the analysis of small RNAs. Seqpac has been on Bioconductor since 2023." +,,poster,From sperm sncRNA to clinical context: integrating sncRNA-seq and complex IVF metadata,"Lovisa Örkenby Kämpe,Kajsa Karlsson,Alessandro Gozzo,Anna Asratian,Anita Öst",,"Involuntary childlessness affects approximately one in six couples attempting to conceive, and many seek treatment through in vitro fertilization (IVF) which is currently the most effective method for assisted fertilization. Despite ongoing improvements, fewer than 40% of IVF cycles result in a successful pregnancy. While most optimization efforts have focused on female-related factors, male factors remain under-addressed and are typically limited to conventional semen parameters with poor predictive value for IVF outcomes. Emerging evidence highlights the diagnostic potential of sperm-borne small non-coding RNAs (sncRNAs) to predict sperm and embryo quality, and that their expression is responsive to lifestyle and diet. These properties open new opportunities to predict IVF outcomes and identify modifiable male factors. In this ongoing study, we analyse sperm sncRNA-seq data from 72 patients undergoing IVF treatment, integrated with detailed clinical and lifestyle metadata obtained in collaboration with the reproduction medicine centre in Linköping, Sweden. To address the high dimensionality and diversity of sncRNA expression, sequence integrity is preserved using the Bioconductor package seqpac, and co-expression patterns are characterized using Weighted Gene Correlation Network Analysis (WGCNA). Module eigengenes are subsequently integrated with clinical covariates in regularized regression models to explore associations between sncRNA expression patterns and IVF outcomes. This study integrates complex clinical and lifestyle metadata into detailed sncRNA seq analysis by incorporating multivariate metadata integration to the seqpac workflow. It further aims to establish an explainable and scalable analytical framework for using sperm sncRNA profiles in the assessment of male fertility and IVF outcome prediction, providing a foundation for future validation and evaluation." +,,poster,An automated framework for mapping tumour transition regions across anatomical sites using single-cell transcriptomics,"Nicolò Gnoato,Laura Masatti,Chiara Romualdi,Enrica Calura",,"Cancer is a complex and evolving disease, shaped by ongoing genetic and transcriptional diversification that generates heterogeneous tumour cell populations within the same patient. This heterogeneity fuels adaptation under selective pressures such as immune surveillance and therapy, and can facilitate tumour dissemination – where malignant cells migrate between anatomical sites, survive new microenvironments, and seed secondary lesions. “In-transition” tumour cells are widely implicated in metastatic progression and relapse, and their ability to evade or withstand treatment often lead to therapy failure. Because these cells are typically rare and transient, they are difficult to capture and characterise with conventional bulk approaches. Single-cell RNA sequencing offers the resolution needed to study dissemination-related states at the level of individual cells, but it also requires computational strategies that can reliably detect transition-associated tumour populations and identify the biological programs that support their survival and spread. In this project, we are developing an automated bioinformatics pipeline designed to identify tumour transition regions from scRNA-seq data and to characterise their pathway-level signatures. The workflow focuses on the tumour compartment and integrates complementary information to detect where transitions are most likely to occur. First, it leverages the local neighbourhood structure among tumour cells to define candidate transition regions. Second, it incorporates anatomical site annotations to highlight connected tumour regions where site composition shifts across neighbouring states. Third, it uses inferred subclonal structure from copy-number-derived subclone assignments, to prioritise patterns consistent with cross-site clonal migration rather than unspecific mixing. Candidate transitions are then supported using statistical testing to prioritise the most robust signals. The pipeline outputs (i) a ranked set of transition regions and their associated tumour cells, and (ii) the corresponding pathway enrichment results contrasting transition cells with non-transition tumour populations. This framework provides a scalable and reproducible approach to map dissemination-associated tumour states and to highlight pathways potentially underlying metastatic competence and therapy resistance. By clarifying the molecular programs active in tumour cells during transition, this approach can help guide mechanistic studies and, in the longer term, support the identification of vulnerabilities that may be used to limit metastatic spread and improve treatment response." +,,poster,Spatial transcriptomics approaches to uncover the complexity of cancer cachexia,"Emma Menna,Valerio Reffo,Giuseppe Persico,Camilla Pezzini,Roberta Sartori,Marco Sandri,Davide Risso",,"Visium HD is the evolution of the 10x Genomics Visium platform that allows the spatial characterization of tissue samples at a subcellular resolution. One advantage of this technique is that it can be complemented by standard H&E staining, allowing nuclei segmentation and integration of imaging and transcriptomic data. Here, we present a study in which we applied Visium HD to Cancer Cachexia, a multifactorial syndrome characterized by severe catabolism of skeletal muscle and adipose tissue and a common feature of solid cancers. The frailty experienced by cachectic patients increases morbidity, reduces tolerance and responsiveness to anti-cancer treatments, complicates patient management and accounts for up to 30% of cancer deaths. As the underlying mechanisms of this syndrome are incompletely defined, effective therapeutics have yet to be developed. In this work, we analyze 24 samples of murine muscle tissue from healthy, cachectic and treated conditions, with the aim of gaining further insight into the cachectic signature and the potential effect of multitarget RNA-based therapies. To maximize the exploitation of the available data, two complementary analysis pipelines were implemented. The first considers spatial spots as analytical units, while the second focuses on individual nuclei. Spots are defined by applying a uniform 16 µm grid to the tissue; although each spot may capture transcripts from multiple cells, it preserves the full molecular signal and enables more effective use of spatial context. On the contrary, nuclei segmentation isolates transcripts at the single-cell level, allowing more cell-specific analyses but at the cost of losing cytoplasmic information. This specific biological context presents specific analytical challenges related to the nature of muscle tissue and of cachexia, requiring careful benchmarking and adaptation of conventional spatial transcriptomics methods, many of which do not fully capture the biological complexity of these samples. Particular emphasis is placed on cell type annotation, the analysis of spatial co-localization and co-expression patterns, and the inference of cell–cell communication." +,,poster,Time-stratified analysis of gut microbiome and latent language profiles in FinnBrain birth cohort: a reproducible Bioconductor workflow,"Nitin Bayal,Rasmus Hindström,Anna-Katariina Aatsinki,Laura Perasto,Heidi Isokääntä,Eveliina Munukka,Essi Saloranta,Elina Mainela-Arnold,Hasse Karlsson,Linnea Karlsson,Leo M Lahti",,"$$Background$$ Modelling associations between cognitive development and neurobiological processes requires reproducible analytical workflows capable of integrating gut microbiome data with latent language profiles. While altered gut microbiota has been associated with neurodevelopmental and psychiatric outcomes, longitudinal evidence connecting microbiome trajectories to latent language profiles (LLP) remains limited. Microbiome data are inherently high-dimensional, sparse, compositional, and hierarchically structured, requiring robust statistical frameworks and reproducible computational ecosystems. The open-source Bioconductor environment provides standardised data containers and validated methods tailored for complex multi-omics analyses. $$Objective$$ To investigate associations between gut microbiome composition, diversity, and developmental trajectories of latent language profiles from infancy to 5 years of age, using reproducible workflows implemented in Bioconductor and R statistical programming language. $$Methods$$ Faecal metagenomic samples from the FinnBrain (n = 969 samples across four timepoints: 6 months, 14 months, 30 months, and 5 years) were analysed. Latent language profiles were derived as three classes (persistent low, stable average and stable high). Data were structured using the TreeSummarizedExperiment container to integrate hierarchical taxonomic information with sample metadata and analysed using R packages. Community composition was analysed at different hierarchies by grouping taxa and normalising their relative abundances. Alpha diversity (Shannon index) was calculated from relative abundance matrices and compared across timepoints and LLP classes using Kruskal Wallis tests with effect size estimation. Beta diversity patterns were assessed using robust Aitchison distance-based ordination and multivariate testing. Covariate dependency and model stability were evaluated via Spearman correlations, Cramér’s V, and variance inflation factors to ensure robust multivariable modelling. All analyses were conducted within the Bioconductor ecosystem using R packages from data wrangling to compositional data handling, diversity analysis, and reproducible Quarto-based reporting in R. $$Results$$ Shannon diversity confirmed progressive microbiome maturation across time points. Community composition analyses revealed age-dependent restructuring of dominant phyla and genera. Group differences were most apparent at 6 months, where the persistent low LLP group showed relatively higher Proteobacteria and lower Bacteroidetes; communities at this stage were dominated by Bifidobacterium with high inter-individual variability, whereas by 5 years, LLP groups converged toward more stable taxa, including Faecalibacterium, Bacteroides, Phocaeicola, and Roseburia. Tests for multicollinearity indicated that the regression models were stable, allowing reliable follow-up analyses. $$Conclusion$$ These findings suggest that while gut microbiome maturation is strongly time-dependent, associations with latent language profiles are subtle and potentially restricted to early developmental windows of infancy. By leveraging standardised TreeSummarizedExperiment data structures and compositional-aware workflows within the Bioconductor ecosystem, we ensured consistent taxonomic aggregation, reproducible diversity estimation and multivariable modelling. This R/Bioconductor framework supports transparent and reproducible longitudinal microbiome analyses in FinnBrain and can be extended to other gut-brain axis cohort studies." +,,poster,GeneScout: de novo discovery of small open reading frames using information-theoretic codon entropy,Dany Mukesha,,"Small open reading frames (sORFs) encode a growing class of biologically active microproteins that are frequently missed by conventional genome annotation pipelines due to their short length, weak sequence conservation, and frequent localization within regions annotated as non-coding. While experimental approaches such as ribosome profiling have revealed widespread translation of sORFs, there remains a need for annotation-independent computational methods to systematically explore hidden coding potential in genomic DNA. We present GeneScout, an open-source R package intended to be compatible with the Bioconductor ecosystem for de novo discovery and prioritization of sORFs using information-theoretic properties of codon usage. GeneScout applies sliding-window analysis to genomic sequences, computing Shannon entropy over codon frequency distributions and Kullback–Leibler divergence relative to organism-specific reference codon usage profiles. Protein-coding regions exhibit constrained codon usage shaped by translational and evolutionary pressures, leading to reduced entropy and characteristic divergence patterns relative to non-coding sequence. Candidate regions exhibiting low entropy and low divergence are merged and scanned for start and stop codons to extract sORFs within user-defined length constraints. Candidates are scored and ranked using a composite metric integrating entropy, divergence, and length, providing a transparent prioritization framework rather than definitive gene calls. GeneScout integrates with Bioconductor infrastructure, supports chromosome-scale analyses, and provides visualization of genome-wide entropy landscapes. GeneScout is designed to complement existing annotation and proteogenomic pipelines by highlighting regions of elevated coding potential for downstream validation using ribosome profiling, evolutionary conservation, or mass spectrometry. By focusing on universal statistical properties of translation, GeneScout enables systematic exploration of sORFs in both well-annotated and poorly annotated genomes." +,,poster,mitology: an R package for dissecting mitochondrial activity from transcriptomic data,"Stefania Pirrotta,Massimo Bonora,Enrica Calura",,"Background: Mitochondria are key organelles involved in a vast range of biological processes, including ATP synthesis, lipid metabolism, calcium homeostasis, cell death, and inflammation. While impaired mitochondrial function is linked to various complex diseases, a dedicated tool for large-scale mitochondrial pathway analysis has been lacking. To fill this gap, we developed mitology, an R package designed to infer and dissect mitochondrial activity from gene expression data. Methods: We integrated gene collections from mitochondrial-specific databases (MitoCarta 3.0, IMPI, and MSeqDR) and Gene Ontology (GO), resulting in a list of 2,996 mitochondrial genes. We then leveraged MitoCarta 3.0, Reactome, and GO hierarchies to re-organize these genes into optimized sets, focusing on mitochondrial-relevant processes while minimizing redundancy. The package allows for enrichment analysis and scoring to study mitochondrial functions at various levels of specificity. Application to Case Studies: The effectiveness of mitology is demonstrated through its application to different transcriptomic contexts, specifically focusing on the role of mitochondria during cell differentiation. Firstly, in a single-cell RNA-seq study of hematopoietic stem cells (HSCs), the tool successfully identified significant mitochondrial remodeling during ex-vivo expansion. Specifically, mitology pinpointed an increase in OXPHOS subunits and ROS metabolism pathways as key drivers of the transition toward a primitive HSC-like state. Furthermore, the tool is being applied to spatial transcriptomics data to investigate how mitochondrial activity correlates with cellular differentiation and spatial organization. In both scenarios, mitology proves its ability to extract meaningful biological insights and characterize the mitochondrial involvement in cell fate determination across different data types. Conclusions: mitology provides a robust and versatile framework for studying transcriptomes through a mitochondrial lens. By effectively filtering and organizing mitochondrial pathways, the tool facilitates the discovery of organelle-specific mechanisms across bulk, single-cell, and spatial biological systems. mitology is available as an R package on Bioconductor." +,,poster,A model-based simulation framework for Single Cell Proteomics data,"Luca De Corso,Christophe Vanderaa,Alexandre Segers,Davide Risso,Lieven Clement",,"Single-cell proteomics (SCP) data generated by mass spectrometry (MS) are characterized by complex experimental designs, strong correlation structures, and pervasive missing values arising from both technical and biological mechanisms. These characteristics pose significant challenges for statistical modeling and make the generation of realistic synthetic data difficult. Hence, high quality simulation-based datasets are lacking to benchmark the quality of current and novel data analysis workflows for SCP. In this contribution, we develop a model-based factorization approach to simulate new datasets that preserve the main empirical properties of real SCP data. The model consists of two components, a logistic regression component to model presence absence and a gaussian component modeling the observed log2 MS intensities. The two specifications provide complementary information, capturing both structured patterns of missing values and variability in MS intensity. The quality of the simulated data is evaluated through a benchmarking procedure based on repeated cell-wise train/test splits and replicate simulations [1]. Feature- and cell-level summary metrics are compared between simulated and reference data using the Kolmogorov–Smirnov statistic and the 2-Wasserstein distance. The results show that the proposed framework successfully reproduces key distributional properties and correlation structures of the reference data. Hence, it provides a promising basis for developing a flexible and principled tool for generating realistic SCP data. [1] Crowell, H. L., Leonardo, S. X. M., Soneson, C. & Robinson, M. D. (2023). The shaky foundations of simulating single-cell rna sequencing data. Genome Biology 24, 62." +,,poster,Unsupervised Identification of Spatial Niches in Spatial Transcriptomics Data,"Ewoud De Troyer,Alemu Takele Assefa,Koen Van den Berge,Xuechen Wang,Bie Verbist,Helena Geys,Maryam Shojaei Shahrokhabadi",,"**Unsupervised Identification of Spatial Niches in Spatial Transcriptomics Data** Ewoud De Troyer$^1$, Maryam Shokaei Shahrokhabadi$^2$, Alemu takele Assefa$^1$, Koen Van den Berge$^1$, Xuechen Wang$^1$, Bie Verbist$^1$, Helena Geys$^1$ $^1$ Statistics and Decision Sciences, Johnson and Johnson Innovative Medicine, Beerse, Belgium; $^2$ Data Science Institute, Hasselt University, Hasselt, Belgium Clustering single-cell spatial omics data in spatial niches is valuable for identifying distinct cellular microenvironments within tissues. We define a spatial niche as a collection of spatially coherent cells with a similar cell population composition or homogeneous gene expression profiles. Such spatial niches can help reveal spatial organization and interactions, providing insights into tissue function, development, and disease mechanisms. Many methods for spatial clustering have been developed in recent years (Yuan et al. 2024 [1], Singhal et al. 2024 [2], ... ), though most methods seem to rely on proper annotation of the cells, which is a non-trivial exercise. We will apply a straight-forward approach to do spatial clustering. The clustering approach is a combination of the approaches described in Liu et al. 2025 [3] and Ding et al. 2025 [4] and allows the unsupervised identification of spatial niches directly from the gene expression across multiple samples. Similar as in existing benchmark and method papers, we apply the clustering approach to some common public data sets and use quantitative evaluation metrics to compare with existing methods. [1] Yuan, Z., Zhao, F., Lin, S. et al. Benchmarking spatial clustering methods with spatially resolved transcriptomics data. Nat Methods 21, 712–722 (2024). https://doi.org/10.1038/s41592-024-02215-8 [2] Singhal, V., Chou, N., Lee, J. et al. BANKSY unifies cell typing and tissue domain segmentation for scalable spatial omics data analysis. Nat Genet 56, 431–441 (2024). https://doi.org/10.1038/s41588-024-01664-3 [3] Liu, N., Martin, J., Bhuva, D. et al. hoodscanR: profiling single-cell neighborhoods in spatial transcriptomics data, Preprint (2025). bioRxiv 2024.03.26.586902; doi: https://doi.org/10.1101/2024.03.26.586902 [4] Ding, D.Y., Tang, Z., Zhu, B. et al. Quantitative characterization of tissue states using multiomics and ecological spatial analysis. Nat Genet 57, 910–921 (2025). https://doi.org/10.1038/s41588-025-02119-z" +,,poster,Computationally exploring different RNA decoration ratios and compositions from ONT RNA-seq data in biological pathways,"Anna C E De Lima Tanada,Paolo Martini,Enrica Calura",,"RNA decorations play a crucial role in all RNA processes, and multiple studies show their implications in pathologies such as cancer. So far, over 150 RNA modifications have been identified, which can be present in any RNA species. Despite the development of new technologies, such as Oxford Nanopore direct RNA-seq (ONT RNA-seq) and advances in computational biology, studies usually focus on a single type and its modifiers. This is mainly due to the difficulty of accurately detecting RNA decorations. However, in doing so, we lose the complexity of the epitranscriptome as a whole, like the co-influence of the modifications. Additionally, finer details of their biological role can be lost, especially in complex diseases such as cancer. In this context, a whole level of complexity is often neglected: the biological pathways. Studying a single modification type and its responsible enzymes only scratches the surface of their mechanism. Thus, more holistic approaches should be considered. This allows questions such as whether different pathways are more frequently modified or whether certain compositions are preferred according to biological function. Therefore, a comprehensive analysis can be paramount in gaining insights into new biomarkers and personalized cancer therapy. This then motivated us to computationally explore different RNA decorations in an integrated, pathway-level fashion. First, we detect the different modification types in ONT RNA-seq samples using computational tools. These tools provide the modification rate for each transcript site. Next, we format the data to obtain the average modification frequency per site and per sample, along with the transcript ID and any associated genes. After gathering this information, we assign the transcripts to their respective biological pathways. This allows us to analyze the ratio of the modification types in each pathway. We later validate our findings using statistical analyses. Through this process, we expect to identify distinct RNA decoration compositions and overall modification rates across pathways. These differences may underlie specific regulatory or disease-related processes. Additionally, our pathway-level approach can be used to compare RNA decorations under different conditions. This method could help identify biomarkers for tumor therapy and assist in classifying molecular subtypes of cancer samples." +,,poster,Spatial Transcriptomics Enables Navigation of the Tumor Microenvironment and Evaluation of Tumor Accessibility for Robust Drug Target Prioritization,"Alemu Takele Assefa,Ewoud De Troyer,Koen Van den Berge,Bie Verbist,Xuechen Wang,Helena Geys",,"Immunotherapy-based drug discovery in oncology requires precise target selection that is both biologically compelling and clinically tractable. Spatial transcriptomics (ST) adds a critical dimension to this process by resolving gene expression within intact tissue architecture, enabling direct navigation of the tumor microenvironment (TME). We outline an ST-driven framework for target identification and prioritization that jointly evaluates (i) tumor cell targetability and (ii) microenvironmental context to identify and evaluate optimal design of Immunotherapy. First, candidate targets are assessed for tumor-selective—i.e., expression on the majority of malignant cells with minimal signal in stromal and healthy compartments. Second, ST quantifies spatial uniformity of target expression across the tissue, ensuring consistency across tumor regions and reducing the risk of regional escape. Third, neighborhood and distance-based ST analyses define the spatial relationships between target-positive tumor cells and immune subsets, characterizing patterns of infiltration and exclusion. These spatial features guide the evaluation and identification of optimal drug target and the choice of optimal engager modality and effector axis (e.g., T- or NK-cell engagement) by matching target distribution with the proximity and abundance of relevant effector cells. Collectively, this integrative approach provides actionable evidence to (1) confirm regionally consistent on-tumor expression, and (2) align immune-engagement strategy with local TME constraints. By coupling target biology with spatial context, ST enables robust, data-driven prioritization that optimize drug target identification and increases the likelihood of clinical impact." +,,poster,Population-scale quantification of microRNA-binding disruptions at the $APOE$ locus using 1000 Genomes allele diversity,Dany Mukesha,,"Non-coding genetic variation contributes substantially to phenotypic diversity and disease susceptibility; however, its regulatory impact remains difficult to characterize at scale. MicroRNAs (miRNAs) regulate gene expression by binding short sequence motifs in 3′ untranslated regions (3′-UTRs), and single-nucleotide variants within these motifs can alter binding in an allele-specific manner. Despite their potential relevance, population-scale patterns of miRNA-binding disruption at disease-associated loci remain underexplored. We present a computational framework to systematically identify allele-specific gain and loss of canonical miRNA seed matches within the $APOE$ locus, a region with major relevance for neurodegenerative disease. Using 3′-UTR annotations, curated mature human miRNA sequences, and phased population variant data from the 1000 Genomes Project, we performed exhaustive sequence scanning to compare reference and alternate alleles for miRNA seed complementarity. Variants were classified as predicted gain or loss events using exact k-mer matching, and event frequencies were quantified across global population groups. Across the $APOE$ locus, we identified six variants with available allele frequency data, spanning rare (AF<0.01) to low-frequency variants. Rare variants such as rs370594287 (c.192G>C, p.Gln64His; AF≈0.0004) altered multiple miRNA seed matches, showing potential regulatory consequences. Commoner variants (AF≈0.18) also introduced substantial gain/loss of miRNA interactions. The distribution of predicted events revealed heterogeneous patterns across populations, with rare variants disproportionately affecting miRNA-binding sites, highlighting contributors to post-transcriptional regulatory diversity that have been largely overlooked. This work demonstrates how sequence-level modeling integrated with population genetic data systematically characterize non-coding regulatory variation. The framework is computationally efficient, reproducible, and provides scalable approach for high-throughput annotation of regulatory variation at complex-trait loci." +,,poster,Improved analysis of in vivo drug combination experiments with a comprehensive statistical framework and web-tool,"Rafael Romero-Becerra,Zhi Zhao,Daniel Nebdal,Elisabeth Müller,Helga Bergholtz,Jens Henrik Norum,Tero Aittokallio",,"Drug combination therapy is often required to overcome the limited benefits of monotherapy in cancer treatment. While several tools exist for in vitro drug synergy screening and assessment, there is a lack of integrated methods for statistical analysis of in vivo combination experiments. To fill this gap, we present SynergyLMM, a comprehensive modeling and design framework for evaluating drug combination effects in preclinical in vivo studies. Unlike other methods, SynergyLMM accommodates complex experimental designs, including multi-drug combinations, and offers practical options for statistical analysis of both synergy and antagonism through longitudinal drug interaction analysis, including model diagnostics and statistical power analysis. These functionalities allow researchers to optimize study designs and determine an appropriate number of animals and follow-up time points required to achieve sufficient synergy and statistical power. SynergyLMM is implemented as an easy-to-use web-application, making it widely accessible for researchers without programming skills. We demonstrate the versatility and added value of SynergyLMM through its applications to various experimental setups and treatment experiments with chemo-, targeted- and immunotherapy. These case studies showcase its potential to improve robustness, statistical rigor and consistency of preclinical drug combination results, enabling a faster and safer transition from preclinical to clinical testing." +,,poster,Statistical Approaches to Assessing Association of Type 2 Diabetes with skin Health status,"Maryam Nasserinejad,Laura Huilaja,Suvi-Päivikki Sinikumpu,Seppo Vainio,Juha Röning,Nsrein Ali,Sylvain Sebert",,"Type 2 diabetes (T2D) is frequently accompanied by comorbidities, yet its relationship with skin disorders remains poorly understood. Identifying key dermatological factors associated with T2D is essential for both clinical insight and reproducible research. In this study, we analyzed data from the Northern Finland Birth Cohort 1966 (NFBC1966), including 1,930 participants (1,036 women, 894 men) at ages 31 and 46. T2D status was defined using oral glucose tolerance tests, self-reports, and Metformin prescriptions, while skin conditions were assessed via dermatologist diagnoses and questionnaires. A total of 47 skin-related variables were evaluated. Variable selection, a crucial step in inferential modeling, was applied to identify factors most consistently associated with T2D. We employed multiple approaches—including test-based, penalty-based, and screening-based methods—to account for variability between methods and improve robustness. Selected variables were further examined using path analysis to estimate direct and indirect effects. Our results indicate that psoriasis (OR 1.115, 95% CI 1.038–1.198, p = 0.033) and Tinea pedis (OR 1.033, 95% CI 1.006–1.059, p = 0.014) have direct associations with T2D, while hyperhidrosis (OR 1.175, 95% CI 1.018–1.394, p = 0.029) exhibits an indirect effect. These findings highlight the interconnectedness of dermatological conditions with T2D and demonstrate the utility of rigorous variable selection in reproducible, methodologically sound analyses. Our approach demonstrates best practices for combining multiple selection methods to identify key influential factors in complex biomedical datasets." +,,poster,The AAtlas - Atlas of the Aorta in Health and Disease,"Jasper Spitzer,Susanne V. Schmidt,Maximillian Billmann",,"As the largest blood vessel in the body, the aorta is exposed to strong physical forces and is associated with several severe pathologies such as abdominal aortic aneurysm (AAA) or aortic valve stenosis (AS). These pathologies occur at specific locations, indicating a difference in local environment, not just physical but also reflected in the gene expression profiles of resident cells adapting to these forces. To elucidate the underlying heterogeneity, we performed single cell sequencing of four distinct sections along the mouse aorta, namely the ascending aorta, the aortic arch, the descending thoracic and abdominal aorta. Samples were collected in healthy mice as well as mice with aortic stenosis and abdominal aortic aneurysm after 28 days, profiling more than 380.000 cells from eight animals each for control, AAA and AS. We demonstrate location specific expression profiles in the major resident cell population, both derived from pattern specification through factors like HOX genes, and, more flexible, in response to injury. Having access to such data allows for in-depth profiling of pathway activities and transcriptional programs active along the aorta and if and how these patterns can persist in cases of structural failure. Additionally, resident and invading immune cell populations are profiled, revealing immune-extracellular matrix interactions at the center of the aortic pathologies. While useful on its own, the data set is then combined with other relevant high-quality data sets like the fetal gene expression atlas to generate a gene-gene network of the aorta, allowing for gene (or gene-list) based queries across cell types, locations and disease states." +,,poster,"Gene Set Differential Scoring enables reliable, modular and intuitive functional enrichment","Dimitri Meistermann,Morgane Frapin,Claudius F Kratochwil",,"The advent of high-throughput sequencing has led to an unprecedented increase in the quantity of output produced by analyses, such as a list of genes, ranging from dozens to thousands. Overrepresentation analysis (ORA) has emerged to provide a functional means of interpreting these genes by combining them in gene sets associated with specific functions, thereby summarizing these lists by putative biological roles. Functional class scoring (FCS) algorithms, such as gene set enrichment analysis (GSEA), have followed. They have the advantage of conserving a more detailed structure of gene lists by classifying genes according to a score of interest. However, both ORA and FCS approaches have been heavily criticized for their lack of interpretability and limited use in complex datasets. A significant barrier to improving ORA and FCS is the substantial information lost from the count table to the methods' input, e.g., a contingency table or a score vector for FCS. This problem has been solved by single-sample scoring (3S) methods, such as GSVA and PLAGE. Here, I present a novel 3S method, GSDS, which first transforms the input matrix in the context of RNA-seq by converting a count table into a gene set activation score matrix. For each sample and gene set in the provided database (GO, KEGG, etc.), we calculate the first principal component of a PCA performed on the gene set matrix and all samples. We then use this vector as the gene set activation score. Using a PCA to summarize expression within a pathway allows to 1/Obtain an activation score that can describe a complex gene set containing downregulation relationships. 2/ it identifies the genes that contributed the most to computing the activation score. In the case of pathways, this allows for an easy interpretation of whether a positive activation score means that the pathway is ""biologically"" on or off. Ultimately, we can perform differential analyses on the activation score matrix to obtain differentially scored gene sets. The activation score matrix can also be reused for downstream analyses, making GSDS a modular approach for specific needs. Plotting the activation scores directly is an intuitive way to visualize the gene set behavior between samples. GSDS is implemented in an R package available on GitHub (https://github.com/DimitriMeistermann/GSDS)." +,,poster,Inference of patient-specific gene regulatory networks via decision path mapping in random forests,"Arindam Ghosh,Teemu Rintala,Vittorio Fortino",,"Traditional gene regulatory network inference methods from transcriptomic data typically yields an aggregate network that obscures any inter-patient heterogeneity, which is a critical barrier in precision oncology. While patient-specific network construction strategies exist, they work by identifying network information gain or loss between a reference and perturbed network, highlighting the differential gene pair interactions for each sample. Here, we present a novel approach that directly infers patient-level regulatory networks from population transcriptomic data using a machine learning-based approach. Our method, inspired from the tool GENIE3, is based on the assumption that gene regulation can be inferred from predictive relationships between genes. We train a series of Random Forest regressor models to predict the expression of each target gene using the expression of given regulator genes. By tracing the sequence of genes in each patient’s decision path in the trees of the forest, we derive a quantitative measurement of the importance of the regulator genes for that specific patient. This importance score infers the strength of the network edge connecting the said regulator to the target in the patient. In a pilot study, we observed that features derived from patient-specific networks created from TCGA breast cancer data were informative for classifying patient sub-types. We believe our method would open new avenues for the characterization of patient-specific cancer driver genes, identification of novel targets, drug repurposing and drug response prediction. Moreover, the proposed framework is directly aligned with the Bioconductor ecosystem, as it builds on transcriptomic data structures, integrates naturally with existing gene regulatory network and machine learning workflows, and could be implemented as a Bioconductor package to enable patient-level network inference for the broader genomics community." +,,poster,Transcriptomic Analysis and Heatmap Generation with DgeaHeatmap,"Leonie Johanna Lancelle,Phani Sankar Potru,Björn Spittau,Susanne Wiemann",,"While it becomes easier to generate huge amounts of data, there is a discrepancy between available tools and researchers’ bioinformatical expertise to analyze their data. For this purpose, we created DgeaHeatmap, an R package that makes complicated steps of transcriptomic data analysis more user-friendly. DgeaHeatmap stands for “Differential Gene Expression Analysis and Heatmaps” and consists of functions that are divided into three parts. There are functions associated with heatmap building, functions for spatial transcriptomics data extraction from Nanostring GeoMx DSP, and functions corresponding to differential expression analysis (DEA). The functions related to heatmap building allow the user to prepare normalized or raw count data to generate heatmaps. These include functions to build and prepare matrices, allows extraction of specific samples or columns of big data sets, to scale the data by Z-score and to show data distribution. Furthermore, there are functions included to perform K-means clustering and hierarchical clustering, to set annotation of samples and genes (or columns and rows), as well as to cluster samples and genes (columns and rows) in the heatmap. Finally, there are functions for more and less advanced heatmap generation included, to build highly customizable heatmaps to the user’s taste. Functions associated to Nanostring GeoMx DSP, a method for multiplexed spatial transcriptomics, allow the data manipulation of raw data generated through GeoMx DSP to extract raw read counts in order to conduct downstream data analysis. The functions of differential expression analysis allow the user to perform DEA based on either limma, DESeq2, or edgeR. Further, functions to extract the differentially expressed genes and to perform pairwise contrasts are supplied. Overall, DgeaHeatmap provides a user-friendly and server-independent approach to analyze transcriptomic data and to create heatmaps with easy customization and optional automatic annotation. The package is also easy to use for less bioinformatically experienced scientists." +,,poster,"Association of D516V, H526Y, and S531L rpoB gene polymorphisms and risk factors with rifampicin resistance in Mycobacterium tuberculosis isolates from pulmonary TB patients in Northwest Amhara, Ethiopia: cross-sectional study","Kinfe Getachew,Nega Berhane,Aynias Seid",,"Abstract Background Drug-resistant tuberculosis (DR-TB) continues to pose a threat to public health worldwide. Rifampicin (RIF) resistance is mostly caused by mutations in the rpoB gene, which codes for the β -subunit of RNA polymerase and is also an important surrogate marker for multidrug-resistant tuberculosis (MDR). Objective This study aimed to detect the rpoB gene mutations associated with RIF resistance and identify the risk factors for MDR/ RIF resistance patterns in individuals infected with pulmonary TB. Methods A facility-based cross-sectional study was conducted at selected TB treatment center hospitals (Felegehiwot, Debre-tabor, University of Gondar, Debark, and Metema hospitals) from June to December 2023 in the Northwestern Amhara regional state of Ethiopia. A total of 206 pulmonary TB patient’s sputum samples were included. The study participants’ Socio-demographics and clinical and behavioral characteristics were collected through semi-structured questionnaires. Then all GeneXpert® MTB/RI-positive sputum specimens of bacterial isolates were culturedin a conventional egg-based solid Lowenstein-Jensen (LJ) medium. MTB Genomic DNA was extracted using GenoLyze Kit. The allele-specific Amplification Refractory Mutation System Polymerase Chain Reaction (ARMS PCR) approach was employed on whole DNA samples from 206 Culture positive isolates using three distinct codon-specific primers (D516V, H526Y, and S531L). Results An isolate is classified as RR-TB if it carries any mutation in the rpoB gene. Most Single nucleotide polymorphism (SNP) mutations were observed on rpoB S531L 19 (9.2%). Of 206 confirmed clinical isolates, 21 (10.2%) were RIF Resistant, while the remaining 185 (89.8%) were RIF susceptible. Before TB treatment history (AOR = 4.27, CI 1.29–14.20, p = 0.02), and Window opening practice of patients (AOR = 6.17, CI 1.22–31.29, p = 0.03) were significantly associated with RR-TB development. Conclusion The prevalence of RR (RIF Resistant) -TB among TB-confirmed cases was 21 (10.2%). This implies that RR-TB is a serious health problem in the study population. The S531L was the most common mutation conferring resistance to RIF." +,,poster,Altered Gut Microbiome Composition of Streptococcaceae and Lachnospiraceae in Multiple Sclerosis Patients,Amaan Arif,,"$Background$: Multiple Sclerosis (MS) is a chronic autoimmune disease that is characterized by inflammation and demyelination in central nervous system (CNS). MS has significantly impacted public health due to its high prevalence and lifelong severe symptoms it causes, including fatigue, mobility issues, cognitive impairment, and visual problems. Recent studies suggest that changes in the gut microbiome may influence MS pathogenesis through the gut-brain axis. $Objective$: This study aims to analyze gut microbiome composition in MS patients compared to healthy individuals to identify potential microbial changes contributing to MS pathogenesis. $Design$: This study utilized metagenomic 16S rRNA sequencing data obtained from publicly available repositories (Bioproject). Quality control was conducted using QIIME2, ensuring high-quality data through demultiplexing, trimming, denoising, and chimera removal. Phylogenetic analysis (MAFFT, FastTree) was performed to construct phylogenetic trees. Microbial diversity was assessed using alpha and beta diversity metrics, and statistical significance was evaluated using tests such as PERMANOVA and Kruskal-Wallis. Differential abundance analyses were performed to determine the significance of microbial differences. $Results$: Significant differences were observed in the gut microbiome composition between MS patients and controls. MS patients had a higher abundance of Streptococcaceae, while Lachnospiraceae was more prevalent in healthy controls. These microbial taxa may be linked to the pathogenesis of MS. $Conclusion$: The study suggests that alterations in gut microbiome composition, particularly in Streptococcaceae and Lachnospiraceae, may influence MS development. Further exploration of these microbial interactions could provide insights into MS progression and treatment strategies." +,,poster,Evaluating spatially variable gene detection methods for spatial transcriptomics data,"Carissa Chen,Hani Jieun Kim,Pengyi Yang",,"The identification of genes that vary across spatial domains in tissues and cells is an essential step for spatial transcriptomics data analysis. Given the critical role it serves for downstream data interpretations, various methods for detecting spatially variable genes (SVGs) have been proposed. However, the lack of benchmarking complicates the selection of a suitable method. The identification of genes that vary across spatial domains in tissues and cells is an essential step for spatial transcriptomics data analysis. Given the critical role it serves for downstream data interpretations, various methods for detecting spatially variable genes (SVGs) have been proposed. However, the lack of benchmarking complicates the selection of a suitable method. Our study evaluates the performance of each method from multiple aspects and highlights the discrepancy among different methods when calling statistically significant SVGs across diverse datasets. Overall, our work provides useful considerations for choosing methods for identifying SVGs and serves as a key reference for the future development of related methods." +,,poster,Spatial point process approaches to high-resolution spatially resolved omics and cytopathology data,"Valerio Reffo,Ilaria Billato,Davide Risso",,"Advances in spatial transcriptomics and proteomics technologies now enable the analysis of an increasing number of molecules with sub-cellular spatial resolution. Elucidating tissue organization at the molecular and cellular levels is of critical interest. In particular, tissue organization, both physiological and pathological, can be understood by studying how interactions between cells and its components happen in tissue space. However, characterizing tissue patterns remains challenging due to the high density of cells, the diversity of cell types, and the resulting complexity of intra- and inter-cellular communication. To address these challenges, we present an analysis framework that leverages spatial point process methodology directly on cell or nucleus coordinates, treating each observation as a point annotated by cell identity. Our study focuses on leveraging the functional form of the second-moment statistics of multitype point processes to capture and characterize spatial associations among cells or nuclei. Our results highlight both the opportunities and complexities of applying point process statistics to spatial transcriptomics. Specifically, we encountered challenges related to computational scalability, the need for careful spatial window selection to ensure stationarity, and the limitations of functional data smoothing for regular transcript distributions. Nonetheless, point process methods were proven valuable in uncovering biologically relevant spatial cell clusters and offer promise for future integrative analyses." +,,poster,Automatic recognition of combined annotated and novel reference transcripts with oarfish and tximeta,Michael I Love,,"The oarfish (Zare Jousheghani et al. 2025) long read quantification tool allows specifying distinct _annotated_ reference transcripts (e.g. GENCODE, Ensembl) and _novel_ reference transcripts (e.g. _de novo_ assemblies) which are combined together as the index for alignment and quantification. With Bioconductor release 3.22, the _tximeta_ package introduces a new workflow for importing quantification data and linking data to transcript metadata, designed for this mixed reference transcript situation. Provenance of reference transcripts is automatically detected via transcript sequence digests. The new workflow steps are: `importData`, `inspectDigests`, and `updateMetadata` (more details in the ""mixed reference transcripts"" section of the _tximeta_ vignette). At EuroBioc, we will seek user feedback on this new long read workflow and example datasets for further testing and feature development." +,,poster,Exploring quantile binning for microbiome machine learning,"Rasmus Hindström,Juho Pelto,Tuomas Borman,Leo M Lahti",,"Effective Machine Learning (ML) on microbiome data often requires transformations that address extreme sparsity and compositional constraints. While relative abundance and centered log-ratio (CLR) transformations are standard, recent findings suggest that simple presence/absence encodings perform exceedingly well while remaining highly interpretable (Karwowska et al., 2025). However, binary encoding discards potentially valuable quantitative information. Inspired by the quantile binning strategy employed in the recent transformer-based foundation model BiomeGPT (Medearis et al., 2026), we have implemented a discretization transformation function within the mia package. By retaining abundance hierarchies while mitigating noise, binning preserves information better than binary methods while offering similar mitigation of noise in raw abundance data. In this work, we demonstrate mia's data wrangling capabilities and benchmark transformations for ML methods, specifically comparing quantile binning against robust centered-log-ratio (rCLR), presence/absence, and relative abundance transformations. We explore how these distinct philosophies affect feature space structure and downstream ML model performance. Our preliminary results indicate that quantile binning can offer slightly improved performance particularly in model sensitivity and accuracy, while maintaining clear interpretation as with presence/absence. We advocate for the inclusion of discretization methods in standard microbiome preprocessing toolkits as a robust strategy for handling noisy, high-dimensional, and compositional data across diverse modeling applications. References: 1. Karwowska, Z., Aasmets, O., Metspalu, M., Metspalu, A., Milani, L., Esko, T., Kosciolek, T., Org, E., & Estonian Biobank research team. (2025). Effects of data transformation and model selection on feature importance in microbiome classification data. Microbiome, 13(1), 2. https://doi.org/10.1186/s40168-024-01996-6​ 2. Medearis, N. A., Zhu, S., & Zomorrodi, A. R. (2026). BiomeGPT: A foundation model for the human gut microbiome (p. 2026.01.05.697599). bioRxiv. https://doi.org/10.64898/2026.01.05.697599" +,,poster,Global antimicrobial resistance patterns in human gut metagenomes are structured along socio-economic gradients,"Mahkameh Salehi,Shivang Bhanushali,Aura Raulo,Guilhelm Sommeria-Klein,Peter Collingon,John J. Beggs,Katariina Pärnänen,Leo M Lahti,Eetu Tammi,johan Bengtsson-Palme",,"Global antimicrobial resistance patterns in human gut metagenomes are structured along socio-economic gradients Mahkameh Salehi¹˒², Shivang Bhanushali¹˒², Eetu Tammi¹, Aura Raulo³, Guilhelm Sommeria-Klein⁴, Peter Collingon⁵˒⁶, John J. Beggs⁷, Johan Bengtsson-Palme⁸˒⁹˒¹⁰, Leo Lahti¹*, Katariina Pärnänen¹˒²* *contributed equally ¹ Dept. of Computing, Univ. of Turku, Finland, ² Dept. of Microbiology, Univ. of Helsinki, Finland, ³ Dept. of Biology, Univ. of Oxford, UK, ⁴ Inria, Univ. Bordeaux, INRAE, France, ⁵ ACT Pathology, Canberra Hospital, Australia, ⁶ Medical School, Australian National Univ., Australia, ⁷ Independent researcher, Melbourne, Australia, ⁸ Dept. of Life Sciences, Chalmers Univ. of Technology, Sweden, ⁹ Centre for Antibiotic Resistance Research (CARe), Sweden, ¹⁰ Sahlgrenska Academy, Univ. of Gothenburg, Sweden Antimicrobial resistance (AMR) is a major global health challenge. Yet current surveillance relies mainly on clinical isolates and provides limited insight into resistance patterns in the general population. Metagenomic sequencing of the human gut microbiome enables population-level assessment of antibiotic resistance genes (ARGs) across both pathogenic and commensal bacteria. However, analysing such large, heterogeneous datasets requires scalable, well-structured, and producible computational frameworks. We analysed more than 58,000 publicly available human gut metagenomes from sequence databases NCBI and ENA, covering multiple continents and income groups. All analyses were conducted in R using Bioconductor data structures, primarily TreeSummarizedExperiment, to integrate resistome profiles, microbiome composition, and country-level metadata within a unified framework. Microbiome and resistome analyses were implemented using Bioconductor-compatible workflows, including the mia ecosystem, enabling consistent handling of multi-assay metagenomic data at scale. ARG load and diversity were significantly higher in low- and middle-income countries compared with high-income countries. Several socio-economic predictors showed context-dependent or reversed associations across income groups. At the country level, ARG composition was strongly associated with socio-economic similarity between countries and closely mirrored gut microbiome composition. Mediation analyses indicated that associations between socio-economic factors and ARG patterns were largely explained by differences in microbiome composition. Overall, this study demonstrates how Bioconductor data containers enable the joint analysis of resistome profiles, microbiome composition, and country-level metadata across tens of thousands of samples." +,,poster,Integrating microbiome responses across warming experiments in coastal marshes,"Johanna Schwarzer,Alexander Bartholomäus,Ella Lu Logemann,Susanne Liebner,Julian Mittmann Goetsch,Simon Thomsen,J. Patrick Megonigal,Roy Rich,Genevieve Noyce,Peter Mueller",,"Coastal marshes are effective carbon sinks, because soil microbial activity is often low under flooded oxygen-deficient conditions. Previous ex-situ warming experiments at the Climate Change Marsh Mesocosm Facility (CCMMF) in Hamburg, Germany, showed that warming can alter soil microbial communities, but responses vary with environmental context. Building on these recent findings, we will expand our analysis by microbial 16S rRNA gene sequencing data sets from two in-situ coastal marsh warming experiments: MERIT (“Marsh Ecosystem Response to Increased Temperatures”) in northern Germany and SmartX (“Salt Marsh Accretion Response to Temperature eXperiment”) in a brackish marsh on Chesapeake Bay, USA. By this we combine genetic microbial data of coastal marshes from different ecosystem age, vegetation type, and tidal regime. Using multivariate analysis that accounts for the compositional structure of sequencing data, we will assess whether warming-induced microbial shifts are consistent across coastal ecosystems or instead shaped by local conditions. We employ the microViz platform (including phyloseq, vegan and microbiome) to explore and visualize our datasets. Differential abundance is assessed with DESeq2 and ALDEx2. To test the hypothesis that warming alters community structure, we want to apply radEMU. Based on previous results, we expect microbial responses to experimental warming to vary strongly with vegetation composition and ecosystem age. With this meta study we aim to identify key factors controlling microbial responses to increased temperatures to better understand how climate change reshapes microbial composition and thereby carbon dynamics in coastal wetlands." +,,poster,Mapping the resistome in global human and poultry gut metagenomes,"Shivang Bhanushali,Johanna Muurinen,Katariina Parnanen,Leo M Lahti",,"Introduction: Poultry farms are major reservoirs of antimicrobial resistance (AMR), but their role in the spillover of antibiotic resistance genes (ARGs) to humans remains poorly understood. This study aimed to determine how host identity and geography shape bacterial and ARG diversity, and whether ARGs cross host barriers more readily than bacterial species. Methods: Shotgun metagenomes from chicken hindgut (n = 1,666) and human stool (n = 2,410) available from European Nucleotide Archive across 10 countries were analysed using MetaPhlAn4, and mapped against the ResFinder ARG database. The outputs and the relevant metadata were containerised in TreeSummarizedExperiment objects and data analysis was performed using the mia microbiome framework. Microbiome and resistome dissimilarities were quantified using Aitchison distances. Dispersion was evaluated via ordination and permutation-based analyses. A hierarchical pairwise effect-size analysis using Rank-Biserial correlation and Probability of Superiority (PS) across pairs was conducted to determine the relative associations of host identity and geography with ARGs and species. Results: Tetracycline resistance genes were the most prevalent resistance determinants in both hosts, although human and poultry samples exhibited distinct AMR signatures. ARG diversity and load were higher in poultry than in humans. Host was the primary driver of dissimilarity, with a stronger effect on bacterial species (mean R2 22.1%) than on the resistome (mean R2 15.6%). Pairwise effect-size analysis indicated that resistomes crossed host and geographic barriers more frequently than species did. Conclusion: Poultry gut ARGs exhibit high cross-host and cross-geographic mobility, highlighting poultry as a global reservoir of resistance." +,,poster,Trajectory-based modelling of gut microbiome development and psychosocial outcomes in childhood,"Laura Perasto,Jenni Korteniemi,Thomaz F.S. Bastiaanssen,Minna Lukkarinen,Hasse Karlsson,Nitin Bayal,Santosh Lamichhane,Heidi Isokääntä,Matilda Kråkström,Eveliina Munukka,Matej Oresic,Alex M Dickens,Akie Yada,Elisabeth Nordenswan,Riikka Korja,Leo M Lahti,Anna Aatsinki,Linnea Karlsson",,"$$Background$$ Human microbiome research examines microorganisms living in the body, particularly in the gut, and their associations with health and disease outcomes. Despite growing interest, there is no consensus on appropriate statistical methods due to the high dimensionality, sparsity, and compositional nature of microbiome data. Studying mental health and its development in the context of microbiome research is essential as it is known that the gut-brain axis is an important bidirectional communication network. Altered gut microbiota has been linked to psychiatric conditions such as depression, autism, and ADHD. Longitudinal data are essential for capturing temporal dynamics and developmental trajectories. It also presents additional challenges, including missing data, irregular sampling, and strong within-subject correlations. Careful statistical modeling is therefore critical. Data science ecosystems, such as Bioconductor, play a central role in addressing these challenges by providing standardized data structures, validated statistical methods, and reproducible analytical workflows. $$Objective$$ To study the association between gut microbiome trajectories and psychosocial development of children in the first 10 years of life, while applying reproducible statistical workflows for complex longitudinal microbiome data. $$Methods$$ Fecal samples collected from the FinnBrain Birth Cohort study are used from multiple timepoints during childhood with maximum sample size of 886. Samples have been analysed using metagenomic sequencing and for multiple metabolites, for example conjugated bile acids. Psychosocial development has been assessed using questionnaires. Data has been formatted as a Tree Summarized Experiment-object to include hierarchical information. Statistical analyses are performed using Bioconductor packages designed for compositional and longitudinal data, with mixed-effects and joint modelling approaches to handle repeated measures and complex associations. $$Results$$ We identified a subgroup of children (~10%) who exhibit increasing psychosocial symptoms. Next, we will examine how this subgroup is associated with the gut microbiota composition." +,,poster,A benchmark of tools for inferring copy number alterations from single-cell RNA sequencing data,"Elena Cibola,Nicolò Gnoato,Paolo Martini,Enrica Calura,Chiara Romualdi",,"Background and Motivation Copy number alterations (CNAs) are large-scale genomic changes that play a key role in cancer heterogeneity, evolution and therapy response. Although single-cell RNA sequencing (scRNA-seq) is widely used to characterize tumor heterogeneity at single-cell resolution, it does not directly measure genomic CNAs. Nevertheless, CNAs can be inferred indirectly from gene expression patterns, based on the assumption that large-scale copy number changes affect the expression of many neighboring genes. Several computational tools have been proposed for this purpose, but their performance on real patient data is still poorly characterized. Methods We benchmarked three widely used computational tools for CNA inference from scRNA-seq data (inferCNV [1], SCEVAN [2], and numbat [3]) using datasets with matched scRNA-seq and single-cell whole-genome sequencing (scWGS) data from the same cells. The first dataset includes 1 astrocytoma patient [4]. The second dataset comprises 21 colorectal cancer patients [5]. For each tool, CNA profiles inferred from scRNA-seq are compared to scWGS-derived profiles genome-wide, and accuracy is measured as the proportion of genomic bases with matching copy number states (loss, neutral or gain). In addition, we evaluated the impact of CNA inference on downstream analyses, particularly the quantification of CNA signatures [6] from the inferred profiles. These signatures, originally defined using DNA sequencing data, summarize recurrent copy number patterns reflecting underlying chromosomal instability processes and can predict response to platinum-based chemotherapy. Signatures were quantified from both scWGS-derived and scRNA-seq-inferred CNA profiles, and used to predict therapy response, using the first ones as a ground truth. Results Preliminary results on the astrocytoma dataset indicate that numbat achieves the highest overall accuracy, with a median agreement of approximately 90% across cells. Our preliminary analyses further suggest limitations in directly quantifying CNA signatures from scRNA-seq-inferred CNA profiles. In particular, therapy response predictions derived from these signatures did not consistently match those obtained from scWGS data, revealing possible limitations in signatures' current applicability to scRNA-seq-based CNA inference. Analyses on the colorectal cancer cohort are ongoing and will allow evaluation across multiple patients and tumor contexts, providing a more robust assessment of tool performance. Conclusion This benchmark provides practical guidance for researchers performing CNA inference from scRNA-seq data, and highlights the impact of inference accuracy on CNA signature quantification and therapy response prediction. These findings motivate the need for the development of signatures specifically tailored to scRNA-seq data. References [1] Tickle, T. et al. inferCNV of the Trinity CTAT Project. Klarman Cell Observatory, Broad Institute of MIT and Harvard (2019). https://github.com/broadinstitute/inferCNV [2] De Falco, A. et al. A variational algorithm to detect the clonal copy number substructure of tumors from scRNA-seq data. Nat Commun 14, 1074 (2023) https://doi.org/10.1038/s41467-023-36790-9 [3] Gao, T. et al. Haplotype-aware analysis of somatic copy number variations from single-cell transcriptomes. Nat Biotechnol 41, 417–426 (2023) https://doi.org/10.1038/s41587-022-01468-y [4] Yu, L. et al. ,scONE-seq: A single-cell multi-omics method enables simultaneous dissection of phenotype and genotype heterogeneity from frozen tumors. Sci. Adv. 9, eabp8901(2023) https://doi.org/10.1126/sciadv.abp8901 [5] Zhou, Y. et al. Single-Cell Multiomics Sequencing Reveals Prevalent Genomic Alterations in Tumor Stromal Cells of Human Colorectal Cancer. Cancer Cell 38(6), 818 - 828.e5 (2020) https://doi.org/10.1016/j.ccell.2020.09.015 [6] Drews, R.M. et al. A pan-cancer compendium of chromosomal instability. Nature 606, 976–983 (2022) https://doi.org/10.1038/s41586-022-04789-9" +,,poster,paRDal: Parallel RNA/DNA Analytical Workflow,"Kateřina Matějková,Petr Nehasil,Zdeněk Kleibl,Petra Kleiblová",,"Next-generation sequencing (NGS) of germline DNA is the clinical gold standard for hereditary cancer diagnostics; however, its diagnostic utility is compromised by the high prevalence of variants of uncertain significance (VUS) which are detected in about 40% of tested individuals. Many VUS affect mRNA processing, leading to aberrant splicing or nonsense-mediated decay (NMD), effects that cannot be determined from DNA NGS alone. While RNA sequencing (RNA-seq) provides the functional evidence required for variant reclassification under ACMG/AMP guidelines (Kleiblova et al. 2024; PMID:38830124), its clinical implementation is often constrained by a 'technical bottleneck': fragmented workflows generate complex, multilayered data that remain inaccessible to clinicians without specialized bioinformatic expertise. To address this challenge, we present paRDal (parallel RNA/DNA analytical workflow), a fully containerized, modular pipeline designed for the systematic integration of matched DNA and RNA NGS data. Validated across diverse datasets, paRDal unifies DNA and RNA variant calling and annotation with advanced transcriptomic analyses, including allele-aware alignment using STAR+WASP, expression outlier detection via OUTRIDER, and aberrant splicing identification using FRASER. A key feature of the framework is its comprehensive variant annotation engine (VEP), which integrates deleteriousness scores, predicted splicing effects, clinical significance, and population frequencies with directly observed functional evidence. To support efficient clinical interpretation, we developed an interactive Shiny application that consolidates these multidimensional data streams on a per-patient basis. By linking variant effect predictions with observed transcriptomic consequence, paRDal enables high-confidence resolution of VUS through a user-friendly interface, supporting variant classification in germline genetic diagnostics." +,,poster,"Integration of transcriptomics, proteomics and miRNA cancer data using the Omics Playground Multi-Omics Platform.","Antonino Zito,Ivo Kwee",,"Multi-omics data integration combines multiple layers of biological information -including genomics, epigenomics, transcriptomics, proteomics, and metabolomics- to uncover relationships that are not detectable when each layer is analyzed independently. Growing evidence indicates that complex phenotypes, including multifactorial diseases, arise from coordinated alterations across molecular layers, making integrated analyses essential. Multi-omics data integration has thus become central to precision medicine, enabling the association of molecular profiles with patient-specific clinical outcomes. We have recently enhanced the Omics Playground Platform -which remains open-source for Academics- with a powerful multi-omics analysis module. The multi-omics module is an integrated bioinformatics framework that combines state-of-the-art statistical and machine learning methods with advanced visualization. This new module implements: (i) Similarity Network Fusion for cross-omics sample clustering; (ii) supervised and unsupervised multi-omics factorization to identify phenotype-associated factors; (iii) multi-omics gene set enrichment analysis; (iv) multi-omics WGCNA to identify correlated feature modules; and (v) deep-learning approaches, including semi-supervised auto-encoders and multilayer perceptrons, to infer biomarkers and their importance. We’ve also developed LASAGNA, an in-house framework for integrated genomic and network analysis, enabling interactive multilayer network visualization and correlation analysis. We evaluated the framework using a publicly available breast cancer dataset of 150 samples concurrently profiled for transcriptomics, proteomics, and miRNAs. Multi-partite LASAGNA analysis revealed previously unknown correlation patterns between multi-omics data. These involved negative correlation between miRNA-455 and the HER2 protein, and positive correlation between the proteins Cyclin B1 and Cyclin E1 and miRNA-20. MOFA analysis showed that Factor 1 captured most data heterogeneity, driven primarily by transcriptomics, followed by miRNAs and proteomics. Factor 1 was strongly associated with the basal subtype, while Factor 5 was specifically correlated with HER2 positivity. Notably, Factor 5 included both the main HER2 protein isoform and the phosphorylated isoform HER2_pY1248, supporting its functional specificity. Enrichment analysis highlighted pathways related to NSCLC, estradiol, and prolactin regulation. Deep-learning analysis identified potential novel biomarkers, including Caveolin and Fibronectin proteins, the genes NTN4 and TTC39A, and miRNA-30a. Altogether, this work demonstrates the ability of the Omics Playground platform to perform comprehensive multi-omics analyses and extract meaningful biological insights, including the identification of novel candidate biomarkers from multimodal data." +,,poster,DNA Methylation-Based Biomarkers analyses: From Epigenetic Clocks to Cross-Platform Harmonization,Chaini Konwar,,"DNA methylation (DNAme)-based biomarkers such as epigenetic clocks are increasingly used in human population research. Epigenetic clocks are recognized as robust biomarkers of aging, and the difference between an individual’s chronological age and their DNAme-predicted epigenetic age reflects the rate of biological aging. Accelerated epigenetic aging has been consistently linked to adverse health conditions and increased all-cause mortality risk. However, with the transition from Illumina EPIC v1 to v2 arrays, researchers face critical analytical challenges as most epigenetic clocks were trained on previous generations of Illumina microarrays, and not all predictive CpGs are retained on EPIC v2 platform. The proposed hands-on workshop will provide a step-by-step, comprehensive, reproducible framework to conduct epigenetic clock investigations in the R computing environment, with particular emphasis on understanding and mitigating platform-specific differences in biomarker estimates measured between EPIC v1 and v2 arrays. The first half of the workshop will be geared towards establishing an Epigenetic Clock Biomarker Analysis Pipeline, including: • Epigenetic Clock Estimation (in adults), using first-generation (Horvath, Hannum), second-generation (PhenoAge, GrimAge), and third-generation clocks (DunedinPACE) • Calculation of epigenetic Age Acceleration (EAA) Metrics, including residual-based and difference-based approaches • Covariate Adjustment with a focus on cell type composition and other biological and technical covariates • Effect Size calculation and interpretation Additional topics include brief introductions to pediatric epigenetic clocks (PedBE) and DNAme-based Inflammation Biomarkers (IL-6 and CRP). The second half of the workshop will address the challenges of integrating EPICv1 and v2 data, with a focus on strategies to ensure biomarker estimates across platforms/versions are comparable. This is specifically relevant in the context of meta-analyses and consortium studies where combining datasets from different array versions is a necessity. Additionally, in the context of a longitudinal cohort design where samples from different timepoint are quantified on different EPIC versions, creating an inevitable confounding between platform and time which makes batch correction inappropriate. All analyses will be conducted using publicly available data and using shared, fully-documented, reproducible R scripts. The workshop will cover data preprocessing, biomarker calculation, statistical modelling, and participants will be familiarized to R packages such ggplot2 for easy publication-quality visualization." +,,poster,Knowledge-based Integration of Multi-Omic Analysis with Anansi: Annotation based Analysis of Specific Interactions.,"Thomaz F.S. Bastiaanssen,Giulio Benedetti,Benjamín Valderrama,Tuomas Borman,Thomas P Quinn,John F. Cryan,Leo M Lahti",,"Motivation: Studies including more than one type of 'omics data sets are becoming more prevalent. Integrating these data sets can be a way to solidify findings and even to make new discoveries. However, common multi-table integration methods produce results that are hard to interpret. For instance, all-vs-all correlation analysis, where each feature of the first data set is correlated to each feature of the second data set, produces unstructured results that are hard to interpret, and involves potentially unnecessary hypothesis testing that reduces statistical power due to false discovery rate (FDR) adjustment. Implementation: We present the anansi R package, available on Bioconductor, as a way to improve upon all-vs-all association analysis. We take a knowledge-based approach where external databases like KEGG are used to constrain the all-vs-all association hypothesis space, only considering pairwise associations that are a priori known to occur. This produces structured results that are easier to interpret, and increases statistical power by skipping unnecessary hypothesis tests. Anansi is compatible with the Bioconductor ecosystem through the MultiAssayExperiment and SummarizedExperiment family of classes. Results: We demonstrate anansi by analysing metabolite-function interactions in the context of joint host-microbial co-metabolism. We further demonstrate how our framework enables more analyses beyond pairwise association testing, including differential association testing, and show how anansi can be used to identify associations that differ in strength or degree based on sample covariates such as case/control status. Anansi is not limited to biological measurement data and supports user-defined databases." +,,poster,Enriching your Biological ‘Omics Analysis using Biological Knowledge Databases with Ariadne,"Thomaz F.S. Bastiaanssen,Giulio Benedetti,Eugenia E. Natasha,Tuomas Borman,Leo M Lahti",,"With the continuous advancement of high-throughput techniques in the biological sciences, large multi-modal datasets are becoming increasingly prevalent. In tandem, the research community has developed extensive and interconnected knowledge databases capturing known relations within and across data modalities. Leveraging the knowledge in the analysis can imrpove robustness and interpretation of results. However, such relational information between data modalities remains unused unless it is organised into a searchable format and made publicly accessible through online databases, thereby transforming untidy data into new usable knowledge. Although this knowledge is widely available across numerous public databases, it is often difficult to access and even more challenging to integrate into a typical omic data analysis workflow. In this workshop, we will discuss strategies to incorporate information from large databases such as KEGG, UniProt and ENZYME (ec). Using an example gut microbiome study with both metagenomic and metabolomic measurements, we demonstrate how ariadne, an R package we developed, can be used to conveniently retrieve, wrangle and integrate across datasets. We further demonstrate how this strategy can be used to leverage domain expertise, in this case microbial ecology, to ask more precise questions of your data. The aim of this workshop is to provide a simple and flexible flexible and flexible and way to integrate biological knowledge into new and existing analytical routines." +,,poster,glycomix : a package for pre-processing and extracting biological signal from CE-LIF glycomics data,"Madeline Vast,Bradley Turner,Kelsey Morgan Wheeler,Olga Sokolovskaya,Anastasiya Grebin,Louis-David Piron,Douglas Kwon,Katharina Ribbeck,Laura Symul",,"Mucus and mucin-linked glycans play central roles in human health, notably by mediating the interactions between microbiomes and their host. Further, glycosylation is one of the most common, yet understudied, post-translational protein modifications which affects the physical and biochemical properties of glycoproteins. In glycomics studies, glycans are typically analysed after separation from their protein. Traditionally, released glycans have been analysed using mass spectrometry-based methods. However, these methods are costly and have a low throughput. Capillary electrophoresis with laser induced fluorescence (CE-LIF) exhibits comparable high sensitivity to traditional methods in the field but at a reduced cost, enabling its use in larger-scale studies. Here, we propose a method to extract peaks intensity values from raw CE-LIF traces to quantify biological differences between samples. Raw CE-LIF traces exhibit several technical artefacts such as batch- and sample-specific intensity and temporal drifts, and spikes, which need to be accounted for in analyses. Our approach is to model the observed signal as a temporal monotonic transformation of a mixture of glycan-derived peaks, expected technical peaks from labelling agents and spiked-in reference compounds, and batch-specific nuisance signals. We assume peaks can be modelled as parametric curves with shape parameters that are shared across samples. Priors for these parameters for biological and technical peaks are described in a peak catalogue along with functional annotations to facilitate interpretation of newly generated results. Each step of the proposed workflow for pre-processing and extracting biological signals to convert raw signals contained within *.CDF* files into tables of feature intensities (*SummarizedExperiment*) is implemented as an independent function of the *glycomix* R package. Applied to data obtained from vaginal mucus samples, our analyses demonstrated differences in glycan composition of participants with a healthy (*Lactobacillus*-dominated) vs. dysbiotic (composed of diverse anaerobes) vaginal microbiota." +,,poster,absuite: An R/Bioconductor Package for Antibody Repertoire Profiling and Clonotype Analysis,"Favour Igwezeke,Olaitan I. Awe",,"Abstract: B-cell receptor (BCR) repertoire sequencing enables detailed characterization of antibody responses in infection, vaccination, and autoimmunity. However, comprehensive repertoire analysis typically requires integrating multiple specialized tools, limiting accessibility for experimental immunologists. We present absuite, an R/Bioconductor package providing an integrated workflow for BCR repertoire analysis. The package implements ten core analytical functions: V-gene usage profiling, convergent clonotype identification across samples, clonality quantification via Shannon entropy and normalized entropy metrics, isotype class-switching analysis, diversity calculations (richness, evenness), clonotype similarity networks with community detection, and repertoire comparison with statistical testing. absuite accepts data from major repositories (Observed Antibody Space, IMGT/HighV-QUEST) and standard formats (AIRR-compliant JSON, CSV), handling preprocessing automatically to allow researchers to focus on biological interpretation. We validated absuite using 187,342 antibody sequences from three published COVID-19 cohorts. V-gene analysis correctly identified established COVID-19 signatures (IGHV3-30, IGHV4-34 enrichment). Convergent analysis detected 57 public clonotypes shared across multiple patients, consistent with published findings. Network analysis using Louvain clustering revealed 447 communities with high modularity (0.86), exposing clonotype relationships and somatic variant lineages. Clonality metrics distinguished expanded clonal responses from diverse repertoires, with values ranging from 0.001 to 0.11 across samples. absuite consolidates BCR repertoire analysis into a single R package, eliminating the need for multiple tools and custom scripting. By providing an accessible, well-documented workflow, absuite enables experimental immunologists to perform sophisticated repertoire analyses independently. The absuite package is currently under development and will soon be submitted to Bioconductor." +,,poster,Metabolic modeling reveals microbial metabolites associated with infant temperament traits,"Abhijit Paul,Anna-Katariina Aatsinki,Matilda Kråkström,Minna Lukkarinen,Eveliina Munukka,Venla Huovinen,Saara Nolvi,Eeva-Leena Kataja,Riikka Korja,Nitin Bayal,Hasse Karlsson,Leo M Lahti,Linnea Karlsson,Santosh Lamichhane,Alex M. Dickens,Matej Orešič",,"Background: Early temperament traits are important predictors of later neurodevelopment and mental health outcomes. Emerging evidence links the infant gut microbiota to early temperament, yet the microbial functional mechanisms underlying these associations remain largely unexplored. Here we examine whether microbial metabolites at 2.5 months of age are associated with temperament traits at 6 months, as these traits may serve as early intermediate phenotypes of later child psychiatric disorders. Methods: Stool samples from 2.5-month-old infants (n = 207) in the FinnBrain Birth Cohort were sequenced using shotgun metagenomics. Species-level taxonomic profiles were generated with nf-core/taxprofiler. Individualized microbial community metabolic models were constructed using the Microbiome Modelling Toolbox (MMT) to estimate metabolite secretion potentials. We examined associations between species-level taxonomic composition, predicted microbial metabolite secretion potentials, and maternal reports on the Infant Behavior Questionnaire–Revised (IBQ-R) at 6 months of age. Model predictions were compared with fecal metabolomics data. Analyses were performed in R using Bioconductor’s mia framework with TreeSummarizedExperiment and MultiAssayExperiment data containers and scater for quality control and visualization, and tidyverse-based tools for data wrangling and visualization. Results: Community metabolic modeling revealed that bile acids, short-chain fatty acids, amino acids, vitamins, and glycans were associated with temperament traits. Taurine- and glycine-conjugated bile acids were positively associated with negative emotionality, while microbially conjugated bile acids with amino acids other than taurine and glycine were positively associated with surgency. Short-chain fatty acids, particularly butyrate and isobutyrate, were negatively associated with fear reactivity. Conclusions: By integrating shotgun metagenomics with community metabolic modeling, we identified associations between microbial bile acid and short-chain fatty acid metabolism pathways and infant temperament, supported by stool metabolomics, and we highlighted putative microbial contributors. These findings extend beyond taxonomic associations to highlight potential microbial functional contributions to early behavioral phenotypes. While powerful, end-to-end integration currently depends on Python- and MATLAB-based toolboxes; expanding native R/Bioconductor support could facilitate broader adoption." +,,poster,Comparative Analysis of Statistical Learning Algorithms for Microbiome Pathway Detection in Ischemic Heart Disease,"Eliana Ibrahimi,Muhammad Yaqoob,Ana Menkshi,Fatjona Murrani,Alise Ponsero",,"While the gut microbiome is known to influence cardiovascular health via metabolic modulation, comprehensive pathway-level characterization using machine learning approaches remains limited. We used data from the Metacardis cohort (metacardis.net), comprising 275 metabolically healthy controls (HC) and 372 patients with established Ischemic Heart Disease (IHD), to identify microbial pathways associated with cardiovascular pathology. Microbiome functional profiles were generated using shotgun metagenomic sequencing data processed via the MOCAT2 pipeline. High-quality reads were mapped to the Integrated Gene Catalog (IGC) of the human gut microbiome. Functional annotation was performed using the KEGG (Kyoto Encyclopedia of Genes and Genomes) database. Using metagenomic functional profiles, we systematically evaluated six machine learning algorithms—including Random Forest, Support Vector Machine, Linear Discriminant Analysis, Logistic Regression, XGBoost, and Artificial Neural Networks—to classify IHD cases against controls. We employed 5-fold cross-validation for hyperparameter tuning, with final performance reported on an independent validation test set. Additionally, SHAP (SHapley Additive exPlanations) analysis was applied to interpret model outputs and isolate key discriminative features. Our results indicate that ANN yielded the best classification performance (AUC=0.91), effectively differentiating IHD patients from healthy controls based on pathway abundance. SHAP analysis identified 15 pivotal pathways, with F-type ATPase, the ComD/ComE two-component regulatory system, and V-type ATPase emerging as the top predictors. These results support existing microbiome-IHD links while highlighting bacterial energetics and quorum sensing as previously underappreciated pathways that warrant further investigation." +,,poster,Chipster: Easy access to R/Bioconductor tools for non-coding scientists,"Heli Juottonen,Jesse Harrison,Eero Saarro,Petri Klemelä,Taavi Hupponen,Eija Korpelainen",,"The widening scope of high-throughput sequencing has created a skills gap: non-computational biologists who need to analyse large data sets. Chipster (http://chipster.csc.fi) offers a versatile collection of bioinformatics tools via an easy-to-use graphical user interface, enabling biologists to access the latest R-based tools for analysing RNA-seq, single-cell RNA-seq, spatial transcriptomics and microbial community amplicon data. The target audience of Chipster is researchers with limited or no computational skills who want to analyse data using up-to-date tools themselves. Chipster is strongly aimed at teaching bioinformatics concepts, tools and workflows. We provide ready-to-use course materials including slides, exercises and data sets. Exercises and analysis examples are available as sessions on the Chipster server, supporting self-learning. Many courses include lecture videos, and the Chipster YouTube channel currently contains 169 videos and has 8260 subscribers. Courses on spatially resolved transcriptomics and single cell RNA-seq data analysis are available as open online courses at any time, and a microbial community data version is under preparation. The R code running behind the scenes in Chipster is visible to users, which enables a smooth transition to R once skills develop. To illustrate how Chipster works, we describe an amplicon sequence analysis workflow for microbial community data relying on the Bioconductor packages DADA2 and Phyloseq. The user starts with raw amplicon sequence data. Bacterial/archaeal 16S and eukaryotic 18S ribosomal RNA gene and fungal internal transcribed spacer amplicons are fully supported with provided databases (Silva and UNITE), but options to provide your own database enable the analysis of other types of amplicon sequence data as well. After quality control and pre-processing, the user processes the data into amplicon sequence variants with taxonomic assignments and creates a Phyloseq object. The Phyloseq object is then used to compare and visualize the structure of microbial communities using alpha diversity estimates, ordinations and multivariate statistics, including differential abundance analysis. Results, visualizations and the underlying source code can be easily exported. Sessions and workflows can be shared among users, enabling collaboration and support. Chipster is an open-source software and available freely as a server installation." +,,poster,Impact of PFAS on lipid metabolism in MASLD is mediated through steroids,"Pauli Tikka,Aidan McGlinchey,Ilia Evstafev,Alex Dickens,Hannele Yki-jarvinen,Tuulia Hyötyläinen,Matej Oresic",,"The liver plays an essential role in the metabolism of nutrients, synthesis of glucose and lipids as well as hormone metabolism. However, the hepatic metabolic functions change especially during the metabolic dysfunction-associated steatotic liver disease (MASLD) (1). In MASLD, the regular homeostasis of liver metabolism can be disrupted by the interactions between the environmental, genetic, and epigenetic factors. We previously found that the exposure to per- and polyfluoroalkyl substances (PFAS) is associated differentially to lipid and bile acid metabolism in females as compared to males (2), suggesting that steroid hormones are playing a role in mediating the impact of PFAS exposure (2). Here we investigated if the steroid hormones are associated with the clinical classifications of the liver disease. In addition, we investigated whether the steroid hormones were mediating the impact of PFAS exposure on lipids and bile acids, and whether this impact is different between males and females. For these purposes, we used mediation analysis (3) and a bivariate analysis. We found that PFAS exposure had sex-specific impact on lipids and bile acids, both directly or indirectly via steroids. The PFAS had a clearly stronger impact on steroid hormones in females than in males. The association of androgens to MASLD was clearly different between the sexes. The general mediation pattern of the indirect impact of PFAS to the bile acids or lipids via steroids was largely similar, with females having higher average causal mediated effects as compared to males. This suggests that the impact of PFAS exposure on hepatic metabolome is mediated via steroids in MASLD (1). (1) Hutchison AL, et al. Endocrine aspects of metabolic dysfunction-associated steatotic liver disease (MASLD): Beyond insulin resistance. J Hepatol. 2023 Dec;79(6):1524-1541. (2) Sen P, et al. Exposure to environmental contaminants is associated with altered hepatic lipid metabolism in non-alcoholic fatty liver disease. J Hepatol. 2022 Feb;76(2):283-293. (3) Tingley D, et al. mediation: R Package for Causal Mediation Analysis. J Stat Softw. 2014 Sep;59(5):1-38." +,,poster,GO-a-GO: an R package for Gene Ontology enrichment analysis of gene pairs,"Aleksander Jankowski,Daryna Yakymenko,Teresa Szczepińska",,"The identification of overrepresented Gene Ontology (GO) terms in a set of genes is a standard approach to obtain functional associations, e.g. to characterize the set of differentially expressed genes between treatment and control samples. Here, we present the R package GO-a-GO that annotates Gene Ontology terms that are enriched in a given set of gene pairs. This provides the opportunity to annotate which functions are associated with gene pairs defined by a selected group of chromatin contacts, such as differential contacts between cell types or chromatin loops. GO-a-GO calculates enrichment from a permutation test for overrepresentation of gene pairs that are associated with a shared GO term. Such gene pairs are counted for the original set of gene pairs and compared against randomized sets in which the structure of the pairs is preserved, but the gene identities (including the associated GO terms) are permuted. Therefore, we do not focus on the fact that the term is enriched in a group of genes, but that genes with this term get paired more often than expected. We used GO-a-GO to identify GO terms enriched in gene pairs associated with chromatin loops identified in human cell line GM12878. The analysis revealed several enriched functional terms associated with both genes in contact pairs. Many of these terms were not enriched when annotating the gene set without the information on gene pairs. In summary, we developed a promising tool to study the function of chromatin contacts, which can be used independently on the type of experimental method used and the nature of contacts (e.g. loops, differential contacts or spatial gene clusters)." +,,poster,CyStyles: Consistent and Exchangeable Visual Styles for Network Visualization in R,Florian Auer,,"Network visualization is central to interpreting biological relationships, yet achieving consistent styling across different visualization tools remains a challenge in R. The Cytoscape Exchange format version 2 (CX2) is the data model that connects these platforms, enabling seamless network exchange between Cytoscape desktop, the Cytoscape web application, and NDEx. Cytoscape is a widely used open-source platform for network analysis and visualization, while NDEx (the Network Data Exchange) serves as a public repository for sharing biological networks. A distinctive feature of CX2 is that visual styles are stored within the network itself as an isolated, reusable, and exchangeable aspect, decoupled from the underlying network data. Currently, support for CX2 visual styles in R is fragmented. The RCX package provides support for the first version of the CX format but lacks the revised visual property model introduced in CX2, and its focus lies on network data (nodes, edges, and their attributes) rather than on validating or constructing proper visual styles. RCy3 uses similar visual properties for network rendering but is oriented toward applying visualization options by remotely controlling a running Cytoscape instance, not toward creating portable, reusable style definitions. Beyond these, several R packages offer graph visualization with different strengths: igraph focuses on graph analysis with basic plotting capabilities, RCy3 enables the most comprehensive visualizations but depends on external Cytoscape software, visNetwork provides interactive HTML-based visualizations suitable for websites and Shiny applications, and RCX includes the NDEx-native web rendering with direct HTML export. Each of these packages defines its own styling interface, meaning that switching between packages requires rewriting visualization code entirely. We present CyStyles, a Bioconductor package that implements visual property definitions following the CX2 specification and bridges the gap between network data management and visualization in R. CyStyles addresses these gaps by providing a unified interface for defining visual styles. The package adopts the visual property mapping paradigm established by Cytoscape. Beyond default visual properties that apply uniformly to all nodes, edges, or the entire network, CyStyles allows users to map network attributes to visual representations through three mapping types: passthrough mappings, which directly use attribute values as visual properties, such as displaying a gene name as a node label; discrete mappings, which assign distinct visual properties to categorical values, for example representing different interaction types as different edge styles; and continuous mappings, which interpolate visual properties along user-defined control points, enabling color gradients or size scaling based on numerical scores. Additionally, CyStyles supports by-pass styling, allowing individual nodes or edges to override defaults and mappings for fine-grained visual adjustments. CyStyles currently supports igraph, RCy3, visNetwork, and RCX for NDEx-native web rendering and HTML export by applying consistent visualization across the packages for a shared core of visual properties. CyStyles also provides a collection of predefined styles, including defaults that emulate each backend's native appearance as well as domain-specific styles for biological networks, metabolic pathways, protein-protein interaction networks, and BioPAX formats. By unifying visual style creation, validation, and multi-package rendering in a single package, CyStyles enables researchers to define a visualization once and deploy it across Cytoscape, NDEx, interactive web applications, and static plots, making network visual styles as portable and reproducible as the network data itself." +,,poster,An Integrated Strategy for Accurate TME Cell Annotation in HGSOC Beyond Current Single-Cell Methods,"Laura Masatti,Nicolò Gnoato,Stefania Pirrotta,Matteo Marchetti,Robert Fruscio,Lorenzo Ceppi,Laura Mannarino,Luca Beltrame,Maurizio D'Incalci,Sergio Marchini,Roberto Tozzi,Chiara Romualdi,Enrica Calura",,"High-grade serous ovarian cancer (HGSOC) remains the leading cause of mortality among gynecologic malignancies, largely due to its invasiveness and the extensive heterogeneity of the tumor microenvironment (TME). Within the TME, cancer-associated fibroblasts (CAFs) are increasingly implicated in tumor progression and therapeutic resistance. However, dissecting these complex cellular interactions requires precise cell-type identification. Current single-cell annotation strategies face significant limitations: manual annotation is labor-intensive and subjective, while automated reference-based methods often fail due to incomplete reference atlases that miss critical HGSOC-specific cell states. To address these challenges, we benchmarked existing annotation methods and performed an in-depth refinement of public gene signatures for TME components. We utilized publicly available datasets alongside a unique HGSOC cohort comprising 17 single-cell samples from multiple patients and anatomical sites to validate our signatures. We propose a standardized analysis pipeline designed to overcome common annotation pitfalls. First, the pipeline distinguishes malignant from normal cells using copy-number variation (CNV) profiles to ensure accurate classification. Second, cell-type assignment is performed using our manually curated and validated list of TME-specific gene signatures. Finally, unsupervised clustering is applied to validate known cell types and identify differentially expressed genes in previously uncharacterized subtypes. The primary objectives of this study are to (1) define robust marker sets for known and novel subtypes, with a specific focus on CAFs; (2) build a comprehensive, curated reference of all cell types expected in HGSOC samples; and (3) deliver a biologically and statistically robust strategy to standardize future single-cell studies in ovarian cancer." +,,poster,Appendiceal microbiome in recurrent appendicitis,"Tanja Orpana,Sanja Vanhatalo,Vilhelm Suksi,Teemu Kallonen,Eveliina Munukka,Jussi Haijanen,Suvi Sippola,Saija Hurme,Antti Hakanen,Leo M Lahti,Paulina Salminen",,"Epidemiologically, clinically, and microbiologically acute appendicitis presents as two different forms of severity: milder uncomplicated and more severe complicated acute appendicitis. A growing body of evidence suggests that uncomplicated acute appendicitis can be successfully treated with antibiotics or perhaps even by symptomatic therapy only. Approximately one-third of patients have recurrent appendicitis after non-operative treatment at long-term follow-up, but most of the recurrences happen within the first 1,5 years. Several, largely oral-associated microbes, have been associated with acute appendicitis. However, the appendiceal microbiome specifically in recurrent appendicitis has not been investigated. The aim of this study was to compare the appendiceal microbiome in recurrent appendicitis to that of primary uncomplicated appendicitis using shotgun metagenomics. In addition, the appendiceal microbiome of patients who did not respond to non-operative treatment within 30 days was compared between complicated and uncomplicated cases. Patients were enrolled in two clinical randomized controlled trials (APPAC II: i.v.+p.o. antibiotics vs. p.o. antibiotics, APPAC III: i.v. + p.o. antibiotics vs. placebo) and one prospective cohort study (MAPPAC). Microbiological samples from appendiceal lumen were collected immediately after the appendectomy. Rectal swabs were collected on index admission. DNA was extracted with GXT Stool extraction kit. Shotgun sequencing reads were trimmed, filtered, depleted of host, and used to estimate species abundance with Kraken2 followed by Bracken. Taxonomic tables were analyzed in Bioconductor, mainly using the mia package. Recurrent appendicitis group (n=36) was not significantly lower in observed richness (β = -27.81, p = 0.058) or Shannon diversity (β = -0.24, p = 0.155) compared with uncomplicated appendicitis group (n=48). Principal component analysis with Aitchison distance indicated only a slight separation between these groups. Taxa which contributed prominently to PCA loadings and were also differentially abundant include Escherichia coli and Haemophilus parainfluenzae, which were respectively less and more abundant in recurrent appendicitis. Species more abundant in uncomplicated group were generally driven by prevalence differences. Comparing the non-responders, there were no differences between uncomplicated (n=13) and complicated appendicitis (n=13). The appendiceal microbiome in recurrent appendicitis and primary uncomplicated appendicitis is similar. Functional and translational implications of putative differences such as E. coli depletion in recurrent appendicitis warrant further investigation. To understand host response and specifically inflammatory profiles in acute appendicitis, we want to integrate metagenomics and metabolomics data. Method development for integration will be accommodated in Bioconductor to work seamlessly with mia and notame packages and promote extension, replication and reproducibility." +,,poster,k-mer manifold approximation and projection for visualizing DNA sequences,Lu Cheng,,"Identifying and illustrating patterns in DNA sequences are crucial tasks in various biological data analyses. In this task, patterns are often represented by sets of k-mers, the fundamental building blocks of DNA sequences. To visually unveil these patterns, one could project each k-mer onto a point in two-dimensional (2D) space. However, this projection poses challenges owing to the high-dimensional nature of k-mers and their unique mathematical properties. Here, we establish a mathematical system to address the peculiarities of the k-mer manifold. Leveraging this k-mer manifold theory, we develop a statistical method named KMAP for detecting k-mer patterns and visualizing them in 2D space. We applied KMAP to three distinct data sets to showcase its utility. KMAP achieves a comparable performance to the classical method MEME, with ∼90% similarity in motif discovery from HT-SELEX data. In the analysis of H3K27ac ChIP-seq data from Ewing sarcoma (EWS), we find that BACH1, OTX2, and KNCH2 might affect EWS prognosis by binding to promoter and enhancer regions across the genome. We also observe potential colocalization of BACH1, OTX2, and the motif CCCAGGCTGGAGTGC in ∼70 bp windows in the enhancer regions. Furthermore, we find that FLI1 binds to the enhancer regions after ETV6 degradation, indicating competitive binding between ETV6 and FLI1. Moreover, KMAP identifies four prevalent patterns in gene editing data of the AAVS1 locus, aligning with findings reported in the literature. These applications underscore that KMAP can be a valuable tool across various biological contexts." +,,poster,Quantitative analysis of genetic interactions in human cells from genome-wide CRISPR-Cas9 screens,"Maximilian Billmann,Chad Myers",,"Genetic interaction (GI) networks in model organisms have revealed how combinations of genome variants can impact phenotypes. To advance efforts toward a reference human GI network, we developed the quantitative Genetic Interaction (qGI) score, a method for precise GI measurement from genome-wide CRISPR-Cas9 screens in different query mutants constructed in a single human cell line. We found surprising prevalent systematic variation unrelated to GIs in CRISPR screen data, including both genomically linked effects and functionally coherent covariation. Leveraging ~40 control screens in wild-type cells and half a billion differential fitness effect measurements, we developed a pipeline for CRISPR screen data processing and normalization to correct these artifacts and measure accurate, quantitative GIs. We also comprehensively characterized GI reproducibility by characterizing 4 – 5 biological replicates for ~125,000 unique gene pairs. The qGI framework enables systematic identification of human GIs and provides broadly applicable strategies for analyzing context-specific CRISPR screen data." +,,poster,Cognitive–Metabolic Decoupling as a Quantitative Trait of Resilience in Alzheimer’s Disease Progression,Dany Mukesha,,"Alzheimer’s disease (AD) progression is typically described using categorical clinical stages, despite substantial inter-individual variability in the relationship between biological pathology and cognitive decline. What remains insufficiently understood is why some individuals maintain stable cognition despite high biological burden (resilience), whereas others deteriorate rapidly with comparatively modest pathology. Here, we introduce Cognitive–Metabolic Decoupling (CMD) as a continuous, quantitative trait capturing this cognitive–biological asynchrony and evaluate its prognostic relevance for dementia progression. Data from 798 participants in the Alzheimer’s Disease Neuroimaging Initiative (192 cognitively normal, 454 mild cognitive impairment [MCI], 152 AD dementia) were analyzed. A Metabolic Dysregulation Score (MDS) was derived using elastic net regression (α = 0.5) integrating FDG-PET and core cerebrospinal fluid biomarkers (Aβ42, total tau, p-tau181) to predict a standardized cognitive composite (MMSE and inverted ADAS-Cog13). CMD was defined as the residual cognitive variance after adjustment for MDS, hippocampal volume normalized by intracranial volume, age, and sex. Cox proportional hazards models assessed associations between CMD and time to dementia conversion, with added prognostic value evaluated using likelihood ratio tests, concordance indices, and time-dependent ROC analysis. MDS explained 47% of cognitive variability and performed comparably to joint multimarker models with fewer parameters. CMD captured 52% of residual cognitive variance and was mathematically independent of hippocampal atrophy (r≈0). Higher CMD was strongly associated with a reduced risk of dementia conversion (hazard ratio per SD=0.48; 95% CI 0.41–0.57; p<10⁻¹⁸), achieving a 2-year AUC of 0.77. Adding CMD to models containing FDG-PET, amyloid, tau, age, and sex significantly improved prognostic performance (Δχ²=80.5; concordance index 0.73→0.82). Among individuals with MCI, CMD stratified participants into clinically distinct trajectories, identifying a subgroup with delayed progression despite comparable biological burden. No significant association was observed in cognitively normal individuals, consistent with limited event rates. CMD provides a robust, interpretable measure of cognitive resilience and vulnerability that substantially augments classical AD biomarkers. This framework reframes disease progression as a continuous divergence between biological and cognitive aging, enabling improved risk stratification and trial enrichment. Future work will validate CMD in independent cohorts and extend the approach to blood-based biomarkers and longitudinal CMD trajectories to support precision medicine in AD. Although demonstrated in Alzheimer’s disease, the CMD computation framework is disease-agnostic and applicable to any setting involving partial decoupling between molecular measurements (multimodal biological data) and phenotypic outcomes." +,,poster,RingNet: An Interactive Platform for Multi-Modal Data Visualization in Networks,"Liang Zhang,Xin Lai",,"Cancer research increasingly integrates heterogeneous patient- and cell-level evidence, including multi-omics profiles, single-cell readouts, and clinical phenotypes, to generate coherent hypotheses that remain interpretable in a biological network context. However, many network visualization workflows either require substantial programming expertise or lack compact, modality-aligned representations for rapid cross-modal comparison across large cohorts. In addition, translational studies often construct patient–patient networks in which nodes represent individuals and weighted edges quantify relationship strength. Although weighted patient graphs are widely used for cohort stratification and hypothesis generation, practical tools for flexibly visualizing these networks at scale together with rich multi-modal patient attributes in a unified and interpretable view remain limited. We present RingNet, a web-based interactive platform for multi-modal network visualization that is tailored to cohort-scale biomedical studies and readily applicable to cancer datasets. RingNet adopts a three-tier architecture. Users upload CSV files through a browser.The files include network of interest and corresponding numerical and categorical data for network nodes and edges.A user session then securely transfers the inputs to an R backend. The backend performs network construction, coordinate optimization, and multi-modal data harmonization, and it exports visualization-ready JSON. A D3.js-based frontend supports real-time exploration, including dynamic layout adjustment, node and edge filtering, customizable color and size encodings, and export of high-quality SVG and PDF figures. A core innovation is RingNet’s concentric ring glyph representation: each node can display multiple aligned rings (modalities), and each ring is subdivided into sectors representing shared cohort entities. This design makes modality concordance/discordance interpretable within and across nodes while preserving network topology. We demonstrate RingNet on (i) TCGA cancer multi-omics mapped to gene regulatory networks and (ii) cell–cell communication networks derived from single-cell data, illustrating how aligned multi-modal node encodings reveal clinically relevant patterns and network-level structure. By supporting both molecular interaction graphs and user-defined, weighted patient–patient networks via the same general template, RingNet lowers the barrier to interactive, reproducible, and publication-ready visualization for cancer discovery and translational hypothesis generation. RingNet is available at https://fip-128-214-252-149.kaj.poutavm.fi/cmt_figures." +,,poster,MiREA: A Network-Based Tool for Edge-Based MicroRNA-oriented Enrichment Analysis,"Zhesi Zhang,Xin Lai",,"MicroRNAs (miRNAs) are post-transcriptional regulators of gene expression and cellular phenotypes, yet systematic interpretation of their functions at the pathway level remains challenging. Existing miRNA enrichment methods are node-based and rely on node-level data while ignoring the role of miRNA-gene interactions (MGIs) in gene regulatory networks, thereby introducing substantial bias and hindering biological interpretation. Here, we develop miREA, an MGI edge-centric network-based tool for miRNA functional enrichment analysis. By integrating miRNA and gene expression profiles with curated gene regulatory networks, miREA can infer MGI regulatory strength through edge-level scoring and supports five edge-based algorithms, namely over-representation, scoring-based, topology-aware, and network-propagation approaches. Comprehensive benchmarking using cancer genomics data shows that compared to node-based methods, the edge-based ones achieve higher sensitivity and better biological interpretability while maintaining reasonable false positive rates. Moreover, a case study in bladder cancer demonstrates that miREA can elucidate the molecular mechanisms of identified functional MGIs from genes to pathways to tumor phenotypes. Overall, miREA is a versatile tool for miRNA enrichment analysis that can elucidate their role in human diseases. The tool’s open-source R code facilitates its publication with Bioconductor and supports dissemination and adoption of the tool within the biomedical community." +,,poster,End-to-end multi-omics workflows for non-model algae: from fragmented data sources to functional interpretation,Ana Belén Romero-Losada,,"Omics analyses are already complex in model organisms. In non-model systems, a major limitation is the lack of unified, reusable annotation resources compatible with established computational ecosystems. Genome assemblies, gene models and functional annotations are typically distributed across heterogeneous databases, complicating reproducible downstream analyses. To address this limitation in microalgae, we constructed species-specific annotation infrastructures fully compatible with Bioconductor. Genome and functional annotations (GO, KEGG Orthology, PFAM terms, EC and KOG) were systematically integrated from multiple repositories and consolidated into SQLite-based OrgDb packages. Gene structure annotations were converted into TxDb objects, enabling genomic-range-based analyses. These packages provide a reusable backbone for omics studies and can be used independently of any web interface. However, to extend its accessibility beyond the bioinformatics community, we implemented these annotation backbones into a set of tools that enable non-expert microalgae research groups to analyse their own sequencing data within a reproducible framework: ALGAEFUN with MARACAS. MARACAS implements an automated workflow (supporting sequential or SLURM-based parallel execution) that processes RNA-seq and ChIP-seq experiments from raw FASTQ files to standardized outputs. For RNA-seq, it performs quality control (FastQC), read alignment (HISAT2 or kallisto), transcript assembly and quantification (StringTie), and differential expression analysis using limma/ballgown. For ChIP-seq, it carries out alignment (bowtie2), peak calling (MACS2), and generation of BED and BigWig files for downstream analyses. These outputs can be directly integrated with ALGAEFUN, which provides an interface designed to lower the technical barrier for non-expert users to obtain a biological interpretation of their results. Functional enrichment (GO and KEGG) and genomic locus annotation are performed using clusterProfiler, pathview, ChIPseeker, and ChIPpeakAnno, ensuring interoperability with the broader Bioconductor ecosystem for non-model algae. Together, MARACAS and ALGAEFUN cover the complete process from raw sequencing data to functional interpretation. This end-to-end design is enabled by the prior unification of functional annotation systems into coherent, species-specific infrastructures, addressing the limitation for omics analyses in non-model microalgae." +,,poster,"Integrated biological and chemical wastewater surveillance reveals highly local dynamics of viral, bacteria and chemicals compositions",Ville N. Pimenoff,,"Wastewater-based surveillance (WBS) is a well-established tool for community surveillance of pathogens and chemicals. During and after COVID-19 pandemic between July 2021 and May 2024 we systematically collected wastewater samples at 18 wastewater treatment plants covering 15 cities in Sweden. Wastewater samples were concentrated and biotic profiling analyzed using shotgun metagenomics and abiotic profiling using LC-MS/MS. From wastewater samples, we identified both known and unknown microbes, including human related bacteria and viruses, as well as sequences of unknown viral origin and a range of known and unknown environmental chemicals. We report a systematic detection of viral human pathogens and phages infecting bacteria typically found in human gut microbiota. Regional and seasonal differences for both microbial and chemical exposome were identified. We could also identify regional outbreaks of human infecting bacteria and virus such as Adenovirus F41 or changes in chemical abundance related to human activities and associated with the time of social relaxation of COVID-19 pandemic restrictions. In summary, systematic and integrated viral, bacterial, and chemical wastewater exposomics is a powerful tool for mapping of cumulative changes in human environmental exposures including unknown biological and chemical components and local outbreak features." +,,poster,Deep Learning Pipeline for Segmentation and Temporal Prediction of Cell Migration in Wound Healing Assays,"Alfredo De Cillis,Valeria Garzarelli,Alessia Foscarini,Maria Serena Chiriacò",,"The prediction of the evolution of complex dynamic systems from image sequences is a significant challenge in the context of machine learning and data-driven systems, especially in the presence of non-linear dynamics, noise and limited datasets. In this work is presented a deep learning-based pipeline for automatic segmentation and temporal prediction applied to timelapse images related to wound healing assays. The structure of the work relies in an ordered series of steps beginning with wound segmentation, followed by numerical prediction of the migration rate at predefined timepoints, and culminating with the prediction of morphological dynamics of the cellular front over time. To achieve high segmentation accuracy, a convolutional neural network based on the ResU-Net architecture has been realized to segment the wound area and to study the proliferation and migration dynamics of cells. The numerical prediction of the cellular migration rate has been carried out using a regression model combined with a similarity procedure based on the k-Nearest Neighbour algorithm. This approach allows the system to provide accurate forecasts by referencing similar historical instances. Furthermore, an autoregressive approach applied to the ResU-Net based convolutional neural network was used to capture the spatial evolution of cell morphology, offering a better understanding of temporal dynamics within the wound healing process. The results demonstrate high segmentation performance and good predictive power in terms of both overall trends and morphological consistency of predictions over different time horizons. The proposed framework can be generalised to other image-based prediction problems, making it a versatile and robust tool for the study and modelling of complex dynamic phenomena in biomedical imaging." +,,poster,Evaluating the reliability of drug response prediction across transcriptomic domains,"Juho Mikkonen,Teemu J. Rintala,Vittorio Fortino",,"Deep learning (DL) has become a central methodology in modern bioinformatics and health sciences, enabling applications ranging from gene expression–based diagnostics and patient stratification to cell type classification, histopathology, and multi-omics data integration. Although these models have the potential to substantially improve clinical decision-making and patient outcomes, they often fail to generalize when applied to new cohorts. Without rigorous external validation using datasets entirely independent of the training process, DL-based models risk overestimating performance by capturing dataset-specific artifacts rather than underlying biological signals. Biological and technical heterogeneity further exacerbate this issue by introducing substantial distribution shifts between datasets. Such shifts arise from differences in sequencing platforms, library preparation protocols, and patient demographics, ultimately leading to unreliable predictions when models are deployed outside their original training domain. This challenge is particularly pronounced in transfer learning and domain adaptation settings involving gene expression data, where the goal is often to transfer drug sensitivity predictions from in vitro cancer cell line models to patient tumors. We recently developed a tool, MODAE, which addresses this task by learning transferable representations using data from CCLE, GDSC, CTRP, and TCGA. However, validating the reliability of such models on truly external datasets remains a major challenge. Here, we propose a diagnostic framework for assessing domain adaptation in deep learning models. This diagnostic was evaluated using MODAE and is designed to support the development of robust validation protocols for deep learning–based de-confounding autoencoders applied to external datasets. Ultimately, this approach provides a principled means to assess whether predictions generated on external cohorts can be considered trustworthy." +,,poster,NetREm: Network Regression Embeddings reveal cell-type transcription factor coordination for gene regulation,"Saniya Khullar,Xiang Huang,Raghu Ramesh,John Svaren,Daifeng Wang",,"Abstract Motivation Transcription factor (TF) coordination plays a key role in gene regulation via direct and/or indirect protein–protein interactions (PPIs) and co-binding to regulatory elements on DNA. Single-cell technologies facilitate gene expression measurement for individual cells and cell-type identification, yet the connection between TF-TF coordination and target gene (TG) regulation of various cell types remains unclear. Results To address this, we introduce our innovative computational approach, Network Regression Embeddings (NetREm), to reveal cell-type TF-TF coordination activities for TG regulation. NetREm leverages network-constrained regularization, using prior knowledge of PPIs among TFs, to analyze single-cell gene expression data, uncovering cell-type coordinating TFs and identifying revolutionary TF-TG candidate regulatory network links. Thus, NetREm outputs a cell-type signed GRN (positive coefficients for activators, negative coefficients for repressors) and unprecedented and novel TF-TF coordination networks (positive scores for cooperative links, negative scores for antagonistic links): TG-specific (for each of the TGs in the cell-type), and an overall network for the cell-type. NetREm’s performance is validated using simulation studies and benchmarked across several datasets in humans, mice, yeast. We show NetREm has comparative performance across various evaluation metrics to state-of-the-art cell-type GRN inference tools and also learns TF-TF coordination links, which other tools do not predict. Further, we showcase NetREm’s ability to prioritize valid novel human TF-TF coordination links in 9 peripheral blood mononuclear and 42 immune cell sub-types. We apply NetREm to examine cell-type networks in central and peripheral nerve systems (e.g. neuronal, glial, Schwann cells) and in Alzheimer’s disease versus Controls. Top predictions are validated with experimental data (e.g. ChIP-seq, Cut&Run, knockout experiments) from rat, mouse, and human models. Additional functional genomics (e.g. expression quantitative trait loci, genome-wide association studies) data helps link genetic variants (e.g. Single Nucleotide Polymorphisms (SNPs)) to our TF-TG regulatory and TF-TF coordination networks. Availability and implementation Please note that NetREm is currently available as an open-source Python package tool at: https://github.com/SaniyaKhullar/NetREm. We provide many tutorials for utilizing our tool to glean insights. We are building an R suite for NetREm as well to cater to the larger community and their programming preferences. NetREm learns latent representations (embeddings) of the input data. We note that NetREm can be applied in several biological scenarios where the predictors (X) and response variable (y) are continuous and the predictors form networks of interactions to impact the response variable. That is, NetREm can help researchers understand not only what predictors influence the response variable, but also how those predictors interact with each other to impact the response variable." diff --git a/pages/schedule.qmd b/pages/schedule.qmd index 869d29e..4fa474d 100644 --- a/pages/schedule.qmd +++ b/pages/schedule.qmd @@ -1,10 +1,5 @@ # Schedule -::: {.callout-note icon=false} -## Note! -The program below is tentative and may be updated. -::: - ```{r} #| results: asis @@ -12,5 +7,8 @@ The program below is tentative and may be updated. #| warning: false #| message: false source("../R/schedule.R") -render_program_schedule() +render_detailed_program( + program_csv = "../data/program.csv", + sessions_csv = "../data/sessions.csv" +) ``` diff --git a/styles.css b/styles.css index 9b36f32..67168be 100644 --- a/styles.css +++ b/styles.css @@ -96,3 +96,42 @@ img.speaker { line-height: 1.25; } +details.schedule-details { + margin: 0; +} + +details.schedule-details summary { + list-style: none; + cursor: pointer; + font-weight: 400; + font-size: 1em; + line-height: 1.45; + + display: block; /* IMPORTANT */ + position: relative; /* for absolute arrow */ + padding-right: 1.8rem; /* space for arrow */ +} + +details.schedule-details summary::-webkit-details-marker { + display: none; +} + +details.schedule-details summary::marker { + content: ""; +} + +/* RIGHT-ALIGNED CHEVRON */ +details.schedule-details summary::after { + content: "›"; + position: absolute; + right: 0; + top: 50%; + transform: translateY(-50%); + font-size: 1.3em; + color: #8a8a8a; +} + +/* OPEN STATE */ +details.schedule-details[open] summary::after { + content: "⌄"; +}