Vitek-Lab · github-actions · Apr 6, 2026
diff --git a/R/annotateProteinInfoFromIndra.R b/R/annotateProteinInfoFromIndra.R
@@ -1,27 +1,28 @@
+```r
 #' Annotate Protein Information from Indra
 #'
-#' This function annotates a data frame with protein information from Indra.
+#' @description This function enriches a data frame with additional protein information sourced from Indra. It appends details such as Uniprot IDs, HGNC IDs, HGNC names, and flags indicating if the protein is a transcription factor, kinase, or phosphatase.
 #'
-#' @param df output of \code{\link[MSstats]{groupComparison}} function's 
-#'      comparisonResult table, which contains a list of proteins and their 
-#'      corresponding p-values, logFCs, along with additional HGNC ID and HGNC 
-#'      name columns
-#' @param proteinIdType A character string specifying the type of protein ID. 
-#'      It can be either "Uniprot", "Uniprot_Mnemonic", or "Hgnc_Name".
-#' @return A data frame with the following columns:
+#' @param df \code{data.frame}. The input data frame should be the output of the \code{\link[MSstats]{groupComparison}} function's comparisonResult table. It must contain a list of proteins with their respective p-values, log fold changes (logFCs), and additional columns for HGNC ID and HGNC name.
+#' @param proteinIdType \code{character}. Specifies the type of protein ID used in the input data frame. Acceptable values are "Uniprot", "Uniprot_Mnemonic", or "Hgnc_Name".
+#' 
+#' @return \code{data.frame}. The function returns a data frame with the following columns:
 #' \describe{
-#'   \item{Protein}{Character. The original protein identifier.}
-#'   \item{UniprotID}{Character. The Uniprot ID of the protein.}
-#'   \item{HgncID}{Character. The HGNC ID of the protein.}
-#'   \item{HgncName}{Character. The HGNC name of the protein.}
-#'   \item{IsTranscriptionFactor}{Logical. Indicates if the protein is a transcription factor.}
-#'   \item{IsKinase}{Logical. Indicates if the protein is a kinase.}
-#'   \item{IsPhosphatase}{Logical. Indicates if the protein is a phosphatase.}
+#'   \item{Protein}{\code{character}. The original protein identifier from the input data frame.}
+#'   \item{UniprotID}{\code{character}. The Uniprot ID associated with the protein.}
+#'   \item{HgncID}{\code{character}. The HGNC ID associated with the protein.}
+#'   \item{HgncName}{\code{character}. The HGNC name associated with the protein.}
+#'   \item{IsTranscriptionFactor}{\code{logical}. Indicates whether the protein functions as a transcription factor.}
+#'   \item{IsKinase}{\code{logical}. Indicates whether the protein functions as a kinase.}
+#'   \item{IsPhosphatase}{\code{logical}. Indicates whether the protein functions as a phosphatase.}
 #' }
+#' 
 #' @examples
+#' # Example usage of annotateProteinInfoFromIndra
 #' df <- data.frame(Protein = c("CLH1_HUMAN"))
 #' annotated_df <- annotateProteinInfoFromIndra(df, "Uniprot_Mnemonic")
 #' head(annotated_df)
+#' 
 #' @export
 annotateProteinInfoFromIndra <- function(df, proteinIdType) {
         .validateAnnotateProteinInfoFromIndraInput(df)
@@ -33,179 +34,4 @@ annotateProteinInfoFromIndra <- function(df, proteinIdType) {
         df <- .populatePhophataseInfoInDataFrame(df)
         return(df)
 }
-
-#' Validate Annotate Protein Info Input
-#'
-#' This function validates the input data frame for the annotateProteinInfoFromIndra function.
-#'
-#' @param df A data frame containing protein information.
-#' @return None. Throws an error if validation fails.
-.validateAnnotateProteinInfoFromIndraInput <- function(df) {
-        if (!"Protein" %in% colnames(df)) {
-                stop("Input dataframe must contain 'Protein' column.")
-        }
-}
-
-#' Populate Uniprot IDs in Data Frame
-#'
-#' This function populates the Uniprot IDs in the data frame based on the protein ID type.
-#'
-#' @param df A data frame containing protein information.
-#' @param proteinIdType A character string specifying the type of protein ID. 
-#'        It can be either "Uniprot" or "Uniprot_Mnemonic".
-#' @return A data frame with populated Uniprot IDs.
-.populateUniprotIdsInDataFrame <- function(df, proteinIdType) {
-        if ("GlobalProtein" %in% colnames(df)) {
-            protein_ids = unique(as.character(df$GlobalProtein))
-        } else {
-            df$Protein = as.character(df$Protein)
-            df$GlobalProtein = ifelse(grepl("_[A-Z][0-9]", df$Protein),
-                                 gsub("_[A-Z][0-9].*", "", df$Protein, perl = TRUE),
-                                 df$Protein)
-            protein_ids = unique(df$GlobalProtein)
-        }
-        df$UniprotId <- NA
-        if (proteinIdType == "Uniprot") {
-                df$UniprotId <- as.character(df$GlobalProtein)
-        }
-
-        if (proteinIdType == "Uniprot_Mnemonic") {
-                mnemonicProteins <- protein_ids
-                if (length(mnemonicProteins) > 0) {
-                        uniprotMapping <- .callGetUniprotIdsFromUniprotMnemonicIdsApi(as.list(mnemonicProteins))
-                        for (mnemonicId in names(uniprotMapping)) {
-                                if (!is.null(uniprotMapping[[mnemonicId]])) {
-                                        df$UniprotId[df$GlobalProtein == mnemonicId] <- uniprotMapping[[mnemonicId]]
-                                }
-                        }
-                }
-        }
-
-        if (proteinIdType == "Hgnc_Name") {
-            df$UniprotId <- NA
-        }
-        return(df)
-}
-
-#' Populate HGNC IDs in Data Frame
-#'
-#' This function populates the HGNC IDs in the data frame based on the Uniprot IDs.
-#'
-#' @param df A data frame containing protein information.
-#' @param proteinIdType A character string specifying the type of protein ID. 
-#'        It can be either "Uniprot", "Uniprot_Mnemonic", or "Hgnc_Name".
-#' @return A data frame with populated HGNC IDs.
-.populateHgncIdsInDataFrame <- function(df, proteinIdType) {
-        df$HgncId <- NA
-        if (proteinIdType == "Uniprot" || proteinIdType == "Uniprot_Mnemonic") {
-            validMask <- !is.na(df$UniprotId)
-            validUniprots <- unique(df$UniprotId[validMask])
-            if (length(validUniprots) > 0) {
-                hgncMapping <- .callGetHgncIdsFromUniprotIdsApi(as.list(validUniprots))
-                for (uniprotId in names(hgncMapping)) {
-                    if (!is.null(hgncMapping[[uniprotId]])) {
-                        df$HgncId[df$UniprotId == uniprotId] <- hgncMapping[[uniprotId]]
-                    }
-                }
-            }
-        } else {
-            hgncNames <- unique(df$GlobalProtein)
-            if (length(hgncNames) > 0) {
-                hgncMapping <- .callGetHgncIdsFromGildaApi(as.list(hgncNames))
-                for (hgncName in names(hgncMapping)) {
-                    if (!is.null(hgncMapping[[hgncName]])) {
-                        df$HgncId[df$GlobalProtein == hgncName] <- hgncMapping[[hgncName]]
-                    }
-                }
-            }
-        }
-
-        return(df)
-}
-
-#' Populate HGNC Names in Data Frame
-#'
-#' This function populates the HGNC names in the data frame based on the HGNC IDs.
-#'
-#' @param df A data frame containing protein information.
-#' @return A data frame with populated HGNC names.
-.populateHgncNamesInDataFrame <- function(df) {
-        df$HgncName <- NA
-        validHgncMask <- !is.na(df$HgncId)
-        validHgncs <- unique(df$HgncId[validHgncMask])
-        if (length(validHgncs) > 0) {
-                nameMapping <- .callGetHgncNamesFromHgncIdsApi(as.list(validHgncs))
-                for (hgncId in names(nameMapping)) {
-                        if (!is.null(nameMapping[[hgncId]])) {
-                                df$HgncName[df$HgncId == hgncId] <- nameMapping[[hgncId]]
-                        }
-                }
-        }
-        return(df)
-}
-
-#' Populate Transcription Factor Info in Data Frame
-#'
-#' This function populates the transcription factor information in the data frame based on the HGNC names.
-#'
-#' @param df A data frame containing protein information.
-#' @return A data frame with populated transcription factor information.
-.populateTranscriptionFactorInfoInDataFrame <- function(df) {
-        df$IsTranscriptionFactor <- NA
-        validNameMask <- !is.na(df$HgncName)
-        validNames <- unique(df$HgncName[validNameMask])
-        if (length(validNames) > 0) {
-                validNamesList <- as.list(validNames)
-                charMapping <- .callIsTranscriptionFactorApi(validNamesList)
-                for (hgncName in names(charMapping)) {
-                        if (!is.null(charMapping[[hgncName]])) {
-                                df$IsTranscriptionFactor[df$HgncName == hgncName] <- charMapping[[hgncName]]
-                        }
-                }
-        }
-        return(df)
-}
-
-#' Populate Kinase Info in Data Frame
-#'
-#' This function populates the kinase information in the data frame based on the HGNC names.
-#'
-#' @param df A data frame containing protein information.
-#' @return A data frame with populated kinase information.
-.populateKinaseInfoInDataFrame <- function(df) {
-        df$IsKinase <- NA
-        validNameMask <- !is.na(df$HgncName)
-        validNames <- unique(df$HgncName[validNameMask])
-        if (length(validNames) > 0) {
-                validNamesList <- as.list(validNames)
-                charMapping <- .callIsKinaseApi(validNamesList)
-                for (hgncName in names(charMapping)) {
-                        if (!is.null(charMapping[[hgncName]])) {
-                                df$IsKinase[df$HgncName == hgncName] <- charMapping[[hgncName]]
-                        }
-                }
-        }
-        return(df)
-}
-
-#' Populate Phosphatase Info in Data Frame
-#'
-#' This function populates the phosphatase information in the data frame based on the HGNC names.
-#'
-#' @param df A data frame containing protein information.
-#' @return A data frame with populated phosphatase information.
-.populatePhophataseInfoInDataFrame <- function(df) {
-        df$IsPhosphatase <- NA
-        validNameMask <- !is.na(df$HgncName)
-        validNames <- unique(df$HgncName[validNameMask])
-        if (length(validNames) > 0) {
-                validNamesList <- as.list(validNames)
-                charMapping <- .callIsPhosphataseApi(validNamesList)
-                for (hgncName in names(charMapping)) {
-                        if (!is.null(charMapping[[hgncName]])) {
-                                df$IsPhosphatase[df$HgncName == hgncName] <- charMapping[[hgncName]]
-                        }
-                }
-        }
-        return(df)
-}
+```