Vitek-Lab · tonywu1999 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 7, 2026
diff --git a/R/dataProcess.R b/R/dataProcess.R
@@ -155,7 +155,7 @@ dataProcess = function(
                             "== Start the summarization per subplot...")
     getOption("MSstatsMsg")("INFO",
                             " == Start the summarization per subplot...")
-    
+
     processed = getProcessed(input)
     input = MSstatsPrepareForSummarization(input, summaryMethod, MBimpute, censoredInt,
                                            remove_uninformative_feature_outlier)
@@ -210,7 +210,12 @@ MSstatsSummarizeWithMultipleCores = function(input, method, impute, censored_sym
                               remove50missing, equal_variance, numberOfCores = 1,
                               aft_iterations = 90) {
     if (numberOfCores > 1) {
-        protein_indices = split(seq_len(nrow(input)), list(input$PROTEIN))
+        is_labeled_reference = "ref" %in% colnames(input) && any(input$ref, na.rm = TRUE)
+        if (is_labeled_reference) {
+            protein_indices = split(seq_len(nrow(input)), list(input$PROTEIN))
+        } else {
+            protein_indices = split(seq_len(nrow(input)), list(input$PROTEIN, input$LABEL))
+        }
         num_proteins = length(protein_indices)
         function_environment = environment()
         cl = parallel::makeCluster(numberOfCores)
@@ -253,9 +258,9 @@ MSstatsSummarizeWithMultipleCores = function(input, method, impute, censored_sym
         parallel::stopCluster(cl)
         return(summarized_results)
     } else {
-        return(MSstatsSummarizeWithSingleCore(input, method, impute, 
-                                              censored_symbol, 
-                                              remove50missing, 
+        return(MSstatsSummarizeWithSingleCore(input, method, impute,
+                                              censored_symbol,
+                                              remove50missing,
                                               equal_variance,
                                               aft_iterations))
     }
@@ -294,7 +299,12 @@ MSstatsSummarizeWithSingleCore = function(input, method, impute, censored_symbol
                             remove50missing, equal_variance, aft_iterations = 90) {
 
 
-    protein_indices = split(seq_len(nrow(input)), list(input$PROTEIN))
+    is_labeled_reference = "ref" %in% colnames(input) && any(input$ref, na.rm = TRUE)
+    if (is_labeled_reference) {
+        protein_indices = split(seq_len(nrow(input)), list(input$PROTEIN))
+    } else {
+        protein_indices = split(seq_len(nrow(input)), list(input$PROTEIN, input$LABEL))
+    }
     num_proteins = length(protein_indices)
     summarized_results = vector("list", num_proteins)
     if (method == "TMP") {
@@ -366,7 +376,7 @@ MSstatsSummarizeSingleLinear = function(single_protein,
 
     cols = intersect(
       colnames(single_protein),
-      c("newABUNDANCE", "cen", "RUN", "FEATURE", "ref")
+      c("newABUNDANCE", "cen", "RUN", "FEATURE", "ref_covariate")
     )
 
     single_protein = single_protein[
@@ -383,33 +393,25 @@ MSstatsSummarizeSingleLinear = function(single_protein,
 
     if (impute & any(single_protein[["censored"]])) {
         survival_fit = .fitSurvival(
-          single_protein[LABEL == "L", cols, with = FALSE],
+          single_protein[, cols, with = FALSE],
           aft_iterations
         )
         sigma2 = survival_fit$scale^2
-        
+
         single_protein[, c("predicted", "imputation_var") := {
             pred = predict(survival_fit, newdata = .SD, se.fit = TRUE)
             list(pred$fit, pred$se.fit^2 + sigma2)
         }]
-
-        single_protein[, predicted := ifelse(
-          censored & (LABEL == "L"),
-          predicted,
-          NA
-        )]
-        single_protein[, newABUNDANCE := ifelse(
-          censored & LABEL == "L",
-          predicted,
-          newABUNDANCE
-        )]
-
-        survival = single_protein[, c(cols, "predicted"), with = FALSE]
+
+        single_protein[, predicted := ifelse(censored, predicted, NA)]
+        single_protein[, newABUNDANCE := ifelse(censored, predicted, newABUNDANCE)]
+
+        survival = single_protein[, intersect(c(cols, "LABEL", "predicted"), colnames(single_protein)), with = FALSE]
     } else {
-        survival = single_protein[, cols, with = FALSE]
+        survival = single_protein[, intersect(c(cols, "LABEL"), colnames(single_protein)), with = FALSE]
         survival[, predicted := NA]
     }
-    
+
     if (all(!is.na(single_protein$ANOMALYSCORES))) {
         single_protein[, weights :=
             anomaly_weights_z_vec(ANOMALYSCORES),
@@ -432,14 +434,12 @@ MSstatsSummarizeSingleLinear = function(single_protein,
     is_single_feature = .checkSingleFeature(single_protein)
 
     if (is_single_feature) {
-        result = single_protein[LABEL == "L",
-                                .(LogIntensities = mean(newABUNDANCE)),
-                                by = RUN]
+        result = single_protein[, .(LogIntensities = mean(newABUNDANCE)), by = RUN]
         result[, Protein := unique(single_protein$PROTEIN)]
+        result[, LABEL := unique(single_protein$LABEL)]
         result[, Variance := NA_real_]
-        setcolorder(result, c("Protein", "RUN", "LogIntensities",
-                              "Variance"))
-
+        setcolorder(result, c("Protein", "RUN", "LogIntensities", "Variance"))
+
         return(list(result, survival))
     } else {
         counts = xtabs(
@@ -465,13 +465,13 @@ MSstatsSummarizeSingleLinear = function(single_protein,
             cf = summary(fit)$coefficients[, 1]
             cov_mat = vcov(fit)
 
-            result = unique(single_protein[, .(Protein = PROTEIN,
-                                               RUN = RUN)])
+            result = unique(single_protein[, .(Protein = PROTEIN, RUN = RUN)])
             extracted_values = get_linear_summary(single_protein, cf,
                                                   counts, label, cov_mat)
             result = cbind(result, extracted_values)
+            result[, LABEL := unique(single_protein$LABEL)]
         }
-        
+
         return(list(result, survival))
     }
 }
@@ -513,7 +513,7 @@ MSstatsSummarizeSingleTMP = function(single_protein, impute, censored_symbol,
     newABUNDANCE = n_obs = n_obs_run = RUN = FEATURE = LABEL = NULL
     predicted = censored = NULL
     cols = intersect(colnames(single_protein), c("newABUNDANCE", "cen", "RUN",
-                                                 "FEATURE", "ref"))
+                                                 "FEATURE", "ref_covariate"))
     single_protein = single_protein[(n_obs > 1 & !is.na(n_obs)) &
                                         (n_obs_run > 0 & !is.na(n_obs_run))]
     if (nrow(single_protein) == 0) {
@@ -522,44 +522,46 @@ MSstatsSummarizeSingleTMP = function(single_protein, impute, censored_symbol,
     single_protein[, RUN := factor(RUN)]
     single_protein[, FEATURE := factor(FEATURE)]
     if (impute & any(single_protein[["censored"]])) {
-        
+
         # Flag to track convergence warning
         converged = TRUE
-        
+
         # Try to fit survival model and catch convergence warnings
         survival_fit = withCallingHandlers({
-            .fitSurvival(single_protein[LABEL == "L", cols, with = FALSE], 
-                         aft_iterations)
+            .fitSurvival(single_protein[, cols, with = FALSE], aft_iterations)
         }, warning = function(w) {
             if (grepl("converge", conditionMessage(w), ignore.case = TRUE)) {
                 message("Convergence warning caught: ", conditionMessage(w))
                 converged <<- FALSE
             }
         })
-        
+
         if (converged) {
             single_protein[, predicted := predict(survival_fit, newdata = .SD)]
         } else {
             single_protein[, predicted := NA_real_]
         }
-
-        single_protein[, predicted := ifelse(censored & (LABEL == "L"), predicted, NA)]
-        single_protein[, newABUNDANCE := ifelse(censored & LABEL == "L",
-                                                predicted, newABUNDANCE)]
-        survival = single_protein[, c(cols, "predicted"), with = FALSE]
+
+        single_protein[, predicted := ifelse(censored, predicted, NA)]
+        single_protein[, newABUNDANCE := ifelse(censored, predicted, newABUNDANCE)]
+        survival = single_protein[, intersect(c(cols, "LABEL", "predicted"), colnames(single_protein)), with = FALSE]
     } else {
-        survival = single_protein[, cols, with = FALSE]
+        survival = single_protein[, intersect(c(cols, "LABEL"), colnames(single_protein)), with = FALSE]
         survival[, predicted := NA]
     }
-    
+
     single_protein = .isSummarizable(single_protein, remove50missing)
     if (is.null(single_protein)) {
         return(list(NULL, NULL))
     } else {
         single_protein = single_protein[!is.na(newABUNDANCE), ]
-        is_labeled = nlevels(single_protein$LABEL) > 1
-        result = .runTukey(single_protein, is_labeled, censored_symbol, 
+        is_labeled_reference = "ref" %in% colnames(single_protein) &&
+            any(single_protein$ref, na.rm = TRUE)
+        result = .runTukey(single_protein, is_labeled_reference, censored_symbol,
                            remove50missing)
+        if (!is.null(result) && !is.element("LABEL", colnames(result))) {
+            result[, LABEL := "L"]
+        }
     }
     list(result, survival)
 }
diff --git a/R/dataProcessPlots.R b/R/dataProcessPlots.R
@@ -261,15 +261,15 @@ dataProcessPlots = function(
                             labels = seq(1, length(unique(RUN))))]
   summarized[, RUN := as.numeric(RUN)]
 
-  ## Meena :due to GROUP=0 for labeled.. extra care required.
+  has_heavy_prof = any(processed$LABEL == "H")
+  heavy_has_real_group_prof = has_heavy_prof && !all(processed[LABEL == "H", GROUP] %in% c("0", NA))
   tempGroupName = unique(processed[, c("GROUP", "RUN")])
-  if (length(unique(processed$LABEL)) == 2) {
+  if (length(unique(processed$LABEL)) == 2 && !heavy_has_real_group_prof) {
     tempGroupName = tempGroupName[GROUP != '0']
-  } 
-  tempGroupName = tempGroupName[order(RUN), ] ## Meena : should we order by GROUP or RUN? I guess by RUn, because x-axis is by RUN
+  }
+  tempGroupName = tempGroupName[order(RUN), ]
   level.group = as.character(unique(tempGroupName$GROUP))
-  tempGroupName$GROUP = factor(tempGroupName$GROUP,
-                                levels = level.group) ## Meena : factor GROUP again, due to 1, 10, 2, ... if you have better way, please change
+  tempGroupName$GROUP = factor(tempGroupName$GROUP, levels = level.group)
 
   groupAxis = as.numeric(xtabs(~GROUP, tempGroupName))
   cumGroupAxis = cumsum(groupAxis)
@@ -292,7 +292,11 @@ dataProcessPlots = function(
 
 
   if (length(unique(processed$LABEL)) == 2) {
-    processed[, LABEL := factor(LABEL, labels = c("Reference", "Endogenous"))]
+    if (heavy_has_real_group_prof) {
+      processed[, LABEL := factor(LABEL, labels = c("Heavy", "Light"))]
+    } else {
+      processed[, LABEL := factor(LABEL, labels = c("Reference", "Endogenous"))]
+    }
   } else {
     if (unique(processed$LABEL) == "L") {
       processed[, LABEL := factor(LABEL, labels = c("Endogenous"))]
@@ -442,8 +446,14 @@ dataProcessPlots = function(
   processed[, RUN := factor(RUN, levels = unique(processed$RUN),
                             labels = seq(1, data.table::uniqueN(processed$RUN)))]
 
+  has_heavy = any(processed$LABEL == "H")
+  heavy_has_real_group = has_heavy && !all(processed[LABEL == "H", GROUP] %in% c("0", NA))
   if (length(unique(processed$LABEL)) == 2) {
-    processed[, LABEL := factor(LABEL, labels = c("Reference", "Endogenous"))]
+    if (heavy_has_real_group) {
+      processed[, LABEL := factor(LABEL, labels = c("Heavy", "Light"))]
+    } else {
+      processed[, LABEL := factor(LABEL, labels = c("Reference", "Endogenous"))]
+    }
     label.color = c("darkseagreen1", "lightblue")
   } else {
     if (unique(processed$LABEL) == "L") {
@@ -454,14 +464,13 @@ dataProcessPlots = function(
       label.color = c("darkseagreen1")
     }
   }
-  
+
   processed = processed[order(LABEL, GROUP, SUBJECT)]
-
-  ## Meena :due to GROUP=0 for labeled.. extra care required.
+
   tempGroupName = unique(processed[, list(GROUP, RUN)])
-  if (length(unique(processed$LABEL)) == 2) {
+  if (length(unique(processed$LABEL)) == 2 && !heavy_has_real_group) {
     tempGroupName = tempGroupName[GROUP != '0']
-  } 
+  }
   tempGroupName = tempGroupName[order(RUN), ] ## Meena : should we order by GROUP or RUN? I guess by RUn, because x-axis is by RUN
   level.group = as.character(unique(tempGroupName$GROUP))
   tempGroupName$GROUP = factor(tempGroupName$GROUP,

diff --git a/R/utils_censored.R b/R/utils_censored.R
@@ -31,49 +31,47 @@ MSstatsHandleMissing = function(input, summary_method, impute,
 
     if (impute & !is.null(missing_symbol)) {
         input$censored = FALSE
+        use_for_analysis = if ("ref" %in% colnames(input)) !input$ref else rep(TRUE, nrow(input))
         ## if intensity = 1, but abundance > cutoff after normalization, it also should be censored.
         if (!is.null(censored_cutoff)) {
-            quantiles = input[!is.na(INTENSITY) & INTENSITY > 1 & LABEL == "L", 
-                              quantile(ABUNDANCE, 
-                                       prob = c(0.01, 0.25, 0.5, 0.75, 
-                                                censored_cutoff), 
+            quantiles = input[use_for_analysis & !is.na(INTENSITY) & INTENSITY > 1,
+                              quantile(ABUNDANCE,
+                                       prob = c(0.01, 0.25, 0.5, 0.75,
+                                                censored_cutoff),
                                        na.rm = TRUE)]
             iqr = quantiles[4] - quantiles[2]
             multiplier = (quantiles[5] - quantiles[4]) / iqr
-            cutoff_lower = (quantiles[2] - multiplier * iqr) 
-            input$censored = !is.na(input$INTENSITY) & 
-                input$LABEL == "L" &
+            cutoff_lower = (quantiles[2] - multiplier * iqr)
+            input$censored = use_for_analysis & !is.na(input$INTENSITY) &
                 input$ABUNDANCE < cutoff_lower
             if (cutoff_lower <= 0 & !is.null(missing_symbol) & missing_symbol == "0") {
-                zero_one_filter = !is.na(input$ABUNDANCE) & input$ABUNDANCE <= 0
+                zero_one_filter = use_for_analysis & !is.na(input$ABUNDANCE) & input$ABUNDANCE <= 0
                 input$censored = ifelse(zero_one_filter, TRUE, input$censored)
             }
             if (!is.null(missing_symbol) & missing_symbol == "NA") {
-                input$censored = ifelse(is.na(input$INTENSITY), TRUE, 
+                input$censored = ifelse(use_for_analysis & is.na(input$INTENSITY), TRUE,
                                         input$censored)
             }
-            
-            msg = paste('** Log2 intensities under cutoff =', 
-                        format(cutoff_lower, digits = 5), 
+
+            msg = paste('** Log2 intensities under cutoff =',
+                        format(cutoff_lower, digits = 5),
                         ' were considered as censored missing values.')
             msg_2 = paste("** Log2 intensities =", missing_symbol, "were considered as censored missing values.")
-            
+
             getOption("MSstatsMsg")("INFO", msg)
             getOption("MSstatsMsg")("INFO", msg_2)
-            
+
             getOption("MSstatsLog")("INFO", msg)
             getOption("MSstatsLog")("INFO", msg_2)
-            
+
         } else {
             if (missing_symbol == '0') {
-                input$censored = input$LABEL == "L" & 
-                    !is.na(input$INTENSITY) &
+                input$censored = use_for_analysis & !is.na(input$INTENSITY) &
                     (input$INTENSITY == 1 | input$ABUNDANCE <= 0)
             } else if (missing_symbol == 'NA') {
-                input$censored = input$LABEL == "L" & is.na(input$ABUNDANCE)
+                input$censored = use_for_analysis & is.na(input$ABUNDANCE)
             }
         }
-        input[, censored := ifelse(LABEL == "H", FALSE, censored)]
     } else {
         input$censored = FALSE
     }
@@ -131,13 +129,13 @@ MSstatsHandleMissing = function(input, summary_method, impute,
     if (impute) {
         if (!is.null(censored_symbol)) {
             if (censored_symbol == "0") {
-                nonmissing_filter = input$LABEL == "L" & !is.na(input$newABUNDANCE) & input$newABUNDANCE != 0
+                nonmissing_filter = !is.na(input$newABUNDANCE) & input$newABUNDANCE != 0
             } else if (censored_symbol == "NA") {
-                nonmissing_filter = input$LABEL == "L" & !is.na(input$newABUNDANCE)
-            }  
-        } 
+                nonmissing_filter = !is.na(input$newABUNDANCE)
+            }
+        }
     } else {
-        nonmissing_filter = input$LABEL == "L" & !is.na(input$newABUNDANCE) & input$newABUNDANCE != 0
+        nonmissing_filter = !is.na(input$newABUNDANCE) & input$newABUNDANCE != 0
     }
     nonmissing_filter
 }
diff --git a/R/utils_checks.R b/R/utils_checks.R
@@ -231,12 +231,17 @@ MSstatsPrepareForDataProcess = function(input, log_base, fix_missing) {
     }
     input$PEPTIDE = paste(input$PEPTIDESEQUENCE, input$PRECURSORCHARGE, sep = "_")
     input$TRANSITION = paste(input$FRAGMENTION, input$PRODUCTCHARGE, sep = "_")
-    input$ISOTOPELABELTYPE = factor(input$ISOTOPELABELTYPE)
-    if (data.table::uniqueN(input$ISOTOPELABELTYPE) == 2) {
-        levels(input$ISOTOPELABELTYPE) = c("H", "L")
-    } else {
-        levels(input$ISOTOPELABELTYPE) = "L"
-    }
+    label_map <- c(
+        "H"     = "H",
+        "L"     = "L",
+        "heavy" = "H",
+        "light" = "L",
+        "NA"    = "NA"
+    )
+    input$ISOTOPELABELTYPE <- factor(
+        label_map[as.character(input$ISOTOPELABELTYPE)],
+        levels = c("H", "L", "NA")
+    )
     input
 }
 
@@ -287,8 +292,8 @@ setMethod(".checkDataValidity", "MSstatsValidated", .prepareForDataProcess)
         skip_absent = TRUE)
 
     input[, FEATURE := paste(PEPTIDE, TRANSITION, sep = "_")]
-    input[, GROUP := ifelse(LABEL == "L", GROUP_ORIGINAL, "0")]
-    input[, SUBJECT := ifelse(LABEL == "L", SUBJECT_ORIGINAL, "0")]
+    input[, GROUP := GROUP_ORIGINAL]
+    input[, SUBJECT := SUBJECT_ORIGINAL]
 
     cols = c("PROTEIN", "PEPTIDE", "TRANSITION", "FEATURE", "LABEL", 
              "GROUP_ORIGINAL", "SUBJECT_ORIGINAL", "RUN", "GROUP",