diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..7a6334b Binary files /dev/null and b/.DS_Store differ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml new file mode 100644 index 0000000..e17131a --- /dev/null +++ b/.github/workflows/check-bioc.yml @@ -0,0 +1,280 @@ +## Read more about GitHub actions the features of this GitHub Actions workflow +## at https://lcolladotor.github.io/biocthis/articles/biocthis.html#use_bioc_github_action +## +## For more details, check the biocthis developer notes vignette at +## https://lcolladotor.github.io/biocthis/articles/biocthis_dev_notes.html +## +## You can add this workflow to other packages using: +## > biocthis::use_bioc_github_action() +## +## Using GitHub Actions exposes you to many details about how R packages are +## compiled and installed in several operating system.s +### If you need help, please follow the steps listed at +## https://github.com/r-lib/actions#where-to-find-help +## +## If you found an issue specific to biocthis's GHA workflow, please report it +## with the information that will make it easier for others to help you. +## Thank you! + +## Acronyms: +## * GHA: GitHub Action +## * OS: operating system + +on: + push: + pull_request: + +name: R-CMD-check-bioc + +## These environment variables control whether to run GHA code later on that is +## specific to testthat, covr, and pkgdown. +## +## If you need to clear the cache of packages, update the number inside +## cache-version as discussed at https://github.com/r-lib/actions/issues/86. +## Note that you can always run a GHA test without the cache by using the word +## "/nocache" in the commit message. +env: + has_testthat: 'true' + run_covr: 'false' + run_pkgdown: 'true' + has_RUnit: 'false' + cache-version: 'cache-v1' + run_docker: 'false' + +jobs: + build-check: + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + container: ${{ matrix.config.cont }} + ## Environment variables unique to this job. + + strategy: + fail-fast: false + matrix: + config: + - { os: ubuntu-latest, r: '4.0', bioc: '3.12', cont: "bioconductor/bioconductor_docker:RELEASE_3_12", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + NOT_CRAN: true + TZ: UTC + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + + ## Set the R library to the directory matching the + ## R packages cache step further below when running on Docker (Linux). + - name: Set R Library home on Linux + if: runner.os == 'Linux' + run: | + mkdir /__w/_temp/Library + echo ".libPaths('/__w/_temp/Library')" > ~/.Rprofile + + ## Most of these steps are the same as the ones in + ## https://github.com/r-lib/actions/blob/master/examples/check-standard.yaml + ## If they update their steps, we will also need to update ours. + - name: Checkout Repository + uses: actions/checkout@v2 + + ## R is already included in the Bioconductor docker images + - name: Setup R from r-lib + if: runner.os != 'Linux' + uses: r-lib/actions/setup-r@master + with: + r-version: ${{ matrix.config.r }} + + ## pandoc is already included in the Bioconductor docker images + - name: Setup pandoc from r-lib + if: runner.os != 'Linux' + uses: r-lib/actions/setup-pandoc@master + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + shell: Rscript {0} + + - name: Cache R packages + if: "!contains(github.event.head_commit.message, '/nocache') && runner.os != 'Linux'" + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_12-r-4.0-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_12-r-4.0- + + - name: Cache R packages on Linux + if: "!contains(github.event.head_commit.message, '/nocache') && runner.os == 'Linux' " + uses: actions/cache@v2 + with: + path: /home/runner/work/_temp/Library + key: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_12-r-4.0-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ env.cache-version }}-${{ runner.os }}-biocversion-RELEASE_3_12-r-4.0- + + - name: Install Linux system dependencies + if: runner.os == 'Linux' + run: | + sysreqs=$(Rscript -e 'cat("apt-get update -y && apt-get install -y", paste(gsub("apt-get install -y ", "", remotes::system_requirements("ubuntu", "20.04")), collapse = " "))') + echo $sysreqs + sudo -s eval "$sysreqs" + + - name: Install macOS system dependencies + if: matrix.config.os == 'macOS-latest' + run: | + ## Enable installing XML from source if needed + brew install libxml2 + echo "XML_CONFIG=/usr/local/opt/libxml2/bin/xml2-config" >> $GITHUB_ENV + + ## Required to install magick as noted at + ## https://github.com/r-lib/usethis/commit/f1f1e0d10c1ebc75fd4c18fa7e2de4551fd9978f#diff-9bfee71065492f63457918efcd912cf2 + brew install imagemagick@6 + + ## For textshaping, required by ragg, and required by pkgdown + brew install harfbuzz fribidi + + ## For installing usethis's dependency gert + brew install libgit2 + + - name: Install Windows system dependencies + if: runner.os == 'Windows' + run: | + ## Edit below if you have any Windows system dependencies + shell: Rscript {0} + + - name: Install BiocManager + run: | + message(paste('****', Sys.time(), 'installing BiocManager ****')) + remotes::install_cran("BiocManager") + shell: Rscript {0} + + - name: Set BiocVersion + run: | + BiocManager::install(version = "${{ matrix.config.bioc }}", ask = FALSE) + shell: Rscript {0} + + - name: Install dependencies pass 1 + run: | + ## Try installing the package dependencies in steps. First the local + ## dependencies, then any remaining dependencies to avoid the + ## issues described at + ## https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016675.html + ## https://github.com/r-lib/remotes/issues/296 + ## Ideally, all dependencies should get installed in the first pass. + + ## Pass #1 at installing dependencies + message(paste('****', Sys.time(), 'pass number 1 at installing dependencies: local dependencies ****')) + remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = TRUE, upgrade = TRUE) + continue-on-error: true + shell: Rscript {0} + + - name: Install dependencies pass 2 + run: | + ## Pass #2 at installing dependencies + message(paste('****', Sys.time(), 'pass number 2 at installing dependencies: any remaining dependencies ****')) + remotes::install_local(dependencies = TRUE, repos = BiocManager::repositories(), build_vignettes = TRUE, upgrade = TRUE) + + ## For running the checks + message(paste('****', Sys.time(), 'installing rcmdcheck and BiocCheck ****')) + remotes::install_cran("rcmdcheck") + BiocManager::install("BiocCheck") + shell: Rscript {0} + + - name: Install BiocGenerics + if: env.has_RUnit == 'true' + run: | + ## Install BiocGenerics + BiocManager::install("BiocGenerics") + shell: Rscript {0} + + - name: Install covr + if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' + run: | + remotes::install_cran("covr") + shell: Rscript {0} + + - name: Install pkgdown + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + run: | + remotes::install_cran("pkgdown") + shell: Rscript {0} + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - name: Run CMD check + env: + _R_CHECK_CRAN_INCOMING_: false + run: | + rcmdcheck::rcmdcheck( + args = c("--no-build-vignettes", "--no-manual", "--timings", + "--no-multiarch"), + build_args = c("--no-manual", "--no-resave-data"), + error_on = "warning", + check_dir = "check" + ) + shell: Rscript {0} + + ## Might need an to add this to the if: && runner.os == 'Linux' + - name: Reveal testthat details + if: env.has_testthat == 'true' + run: find . -name testthat.Rout -exec cat '{}' ';' + + - name: Run RUnit tests + if: env.has_RUnit == 'true' + run: | + BiocGenerics:::testPackage() + shell: Rscript {0} + + - name: Run BiocCheck + run: | + BiocCheck::BiocCheck( + dir('check', 'tar.gz$', full.names = TRUE), + `quit-with-status` = TRUE, + `no-check-R-ver` = TRUE, + `no-check-bioc-help` = TRUE + ) + shell: Rscript {0} + + - name: Test coverage + if: github.ref == 'refs/heads/master' && env.run_covr == 'true' && runner.os == 'Linux' + run: | + covr::codecov() + shell: Rscript {0} + + - name: Install package + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + run: R CMD INSTALL . + + - name: Deploy package + if: github.ref == 'refs/heads/master' && env.run_pkgdown == 'true' && runner.os == 'Linux' + run: | + git config --local user.email "actions@github.com" + git config --local user.name "GitHub Actions" + Rscript -e "pkgdown::deploy_to_branch(new_process = FALSE)" + shell: bash {0} + ## Note that you need to run pkgdown::deploy_to_branch(new_process = FALSE) + ## at least one locally before this will work. This creates the gh-pages + ## branch (erasing anything you haven't version controlled!) and + ## makes the git history recognizable by pkgdown. + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@master + with: + name: ${{ runner.os }}-biocversion-RELEASE_3_12-r-4.0-results + path: check + + - uses: docker/build-push-action@v1 + if: "!contains(github.event.head_commit.message, '/nodocker') && env.run_docker == 'true' && runner.os == 'Linux' " + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + repository: felixernst/xtail + tag_with_ref: true + tag_with_sha: true + tags: latest diff --git a/.gitignore b/.gitignore index 4808d1c..5d1001c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # History files +.RData .Rhistory .Rapp.history .Rbuildignore diff --git a/DESCRIPTION b/DESCRIPTION index 0c38b16..ed1b842 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,19 +1,61 @@ Package: xtail Type: Package -Title: Genome-wide assessment of differential translations with - ribosome profiling data -Version: 1.1.5 -Date: 2016-05 -Author: Zhengtao Xiao, Qin Zou, Yu Liu and Xuerui Yang -Maintainer: Zhengtao Xiao ; Rongyao Huang -Description: The purpose of this package is to discover the genes undergoing differential translation across two different experiment conditions - using both RNA-seq data and ribosom profiling data. +Title: Genome-wide assessment of differential translations with ribosome profiling data +Version: 1.2.0 +Authors@R: + c(person("Zhengtao", "Xiao", email="zhengtao.xiao[at]xjtu.edu.cn", + role = c("aut","cre")), + person("Qin", "Zou", role = "aut"), + person("Yu", "Liu", role = "aut"), + person("Xuerui", "Yang", role = "aut"), + person("Rongyao", "Huang", email = "THUhry12[at]163.com", role = "ctb")) +Description: + The xtail package is designed to identify genes exhibiting differential + translation between two experimental conditions by simultaneously analyzing + changes in RNA-seq and ribo-seq data. The estimation of changes in read + counts are performed using the DESeq2 package. +License: + GPL (>= 3) | file LICENSE +Encoding: UTF-8 +NeedsCompilation: yes +biocViews: + Software, DifferentialExpression +Depends: + R (>= 4.0), + DESeq2, + S4Vectors, + BiocGenerics +Imports: + methods, + stats, + utils, + parallel, + Rcpp (> 0.10.1), + matrixStats, + SummarizedExperiment, + locfit, + ggplot2 +Suggests: + knitr, + rmarkdown, + BiocStyle, + testthat +Collate: + 'xtail-package.R' + 'xtail-class.R' + 'RcppExports.R' + 'wrapper-functions.R' + 'estimate-functions.R' + 'xtail.R' + 'runXtail.R' + 'plot-utils.R' + 'plot-fc.R' + 'plot-r.R' + 'plot-volcano.R' +LinkingTo: + Rcpp, + RcppArmadillo VignetteBuilder: knitr -Depends: DESeq2 -Imports: parallel, Rcpp (> 0.10.1), genefilter -Suggests: knitr, BiocStyle -LinkingTo: Rcpp, RcppArmadillo -License: GPL (>= 3) URL: https://github.com/xryanglab/xtail -NeedsCompilation: yes -Packaged: 2016-05 +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 270934c..8d08967 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,14 +1,58 @@ -import(parallel) -import(locfit) -import(DESeq2) -importFrom(genefilter,rowVars) -import(Rcpp) -export(xtail) +# Generated by roxygen2: do not edit by hand + +S3method(summary,xtail) +export(addXtail) export(plotFCs) export(plotRs) -export(volcanoPlot) -export(summary.xtailResults) +export(resultsNum) export(resultsTable) +export(runXtail) +export(summary) +export(volcanoPlot) export(write.xtail) +export(xtail) +exportClasses(xtail) +exportMethods(addXtail) +exportMethods(conditions) +exportMethods(plotFCs) +exportMethods(plotRs) +exportMethods(resultsNum) +exportMethods(resultsTable) +exportMethods(runXtail) +exportMethods(volcanoPlot) +import(DESeq2) +import(Rcpp) +import(ggplot2) +import(methods) +importFrom(BiocGenerics,conditions) +importFrom(S4Vectors,"mcols<-") +importFrom(S4Vectors,"metadata<-") +importFrom(S4Vectors,DataFrame) +importFrom(S4Vectors,SimpleList) +importFrom(S4Vectors,mcols) +importFrom(S4Vectors,metadata) +importFrom(SummarizedExperiment,"assay<-") +importFrom(SummarizedExperiment,"rowData<-") +importFrom(SummarizedExperiment,assay) +importFrom(SummarizedExperiment,colData) +importFrom(SummarizedExperiment,rowData) +importFrom(locfit,locfit) +importFrom(matrixStats,rowVars) +importFrom(parallel,clusterExport) +importFrom(parallel,detectCores) +importFrom(parallel,makeCluster) +importFrom(parallel,parLapply) +importFrom(parallel,stopCluster) +importFrom(stats,complete.cases) +importFrom(stats,dnbinom) +importFrom(stats,dnorm) +importFrom(stats,formula) +importFrom(stats,median) +importFrom(stats,model.matrix) +importFrom(stats,optim) +importFrom(stats,p.adjust) +importFrom(stats,predict) +importFrom(stats,relevel) +importFrom(stats,terms) +importFrom(utils,write.table) useDynLib(xtail) - diff --git a/NEWS b/NEWS index 13fcbbd..bb7f81a 100755 --- a/NEWS +++ b/NEWS @@ -1,22 +1,28 @@ -CHANGES IN VERSION 1.1.5 -************************ -=> add citations for xtail. -=> add summary() function -=> add resultsTable() function -=> add write.xtail() function -=> add plotFCs() function -=> add plotRs() function -=> add volcanoPlot() function - -CHANGES IN VERSION 1.1.4 -************************ -=> support for different number of replicates in each condition. - - -CHANGES IN VERSION 1.1.3 -************************ ------------------------- -=> add credible interval. -=> bug fixed, see issues #2. -=> official version - +CHANGES IN VERSION 1.2.0 +************************ +=> fixed issue for R 4.0 +=> added SummarizedExperiment wrapper functions +=> Fixed Error: logical subscript contains NAs + +CHANGES IN VERSION 1.1.5 +************************ +=> add citations for xtail. +=> add summary() function +=> add resultsTable() function +=> add write.xtail() function +=> add plotFCs() function +=> add plotRs() function +=> add volcanoPlot() function + +CHANGES IN VERSION 1.1.4 +************************ +=> support for different number of replicates in each condition. + + +CHANGES IN VERSION 1.1.3 +************************ +------------------------ +=> add credible interval. +=> bug fixed, see issues #2. +=> official version + diff --git a/R/AllGenerics.R b/R/AllGenerics.R deleted file mode 100755 index 5c4622c..0000000 --- a/R/AllGenerics.R +++ /dev/null @@ -1,24 +0,0 @@ - -#' @rdname dispersions2 -#' @export -setGeneric("dispersionMatrix", function(object,...) standardGeneric("dispersionMatrix")) - -#' @rdname dispersions2 -#' @export -setGeneric("dispersionMatrix<-", function(object,...,value) standardGeneric("dispersionMatrix<-")) - -#' @rdname resultsTable -#' @export -setGeneric("resultsTable", function(object,...) standardGeneric("resultsTable")) - -#' @rdname plotFCs -#' @export -setGeneric("plotFCs", function(object,...) standardGeneric("plotFCs")) - -#' @rdname vocanoPlot -#' @export -setGeneric("volcanoPlot", function(object,...) standardGeneric("volcanoPlot")) - -#' @rdname xtailResults -#' @export -setClass("xtailResults") diff --git a/R/CoreFuns.R b/R/CoreFuns.R deleted file mode 100755 index 0c9f572..0000000 --- a/R/CoreFuns.R +++ /dev/null @@ -1,566 +0,0 @@ -######################################################### -# -# These functions are copied and modify from DESeq2 package -# -######################################################### - -## Xtail relies on DESeq2 to estimate coefficient of GLM model - -estimateMLEForBeta <- function(object,modelMatrix=NULL,modelMatrixType="standard", - maxit=100, useOptim=TRUE, quiet=FALSE,useQR=TRUE) { - if (is.null(dispersionMatrix(object))) { - stop("testing requires dispersionMatrix estimates, first call estimateDispersions()") - } - stopifnot(length(maxit)==1) - - # in case the class of the mcols(mcols(object)) are not character - object <- sanitizeRowData(object) - - if (is.null(mcols(object)$allZero)) { - object <- getBaseMeansAndVariances(object) - } - - # only continue on the rows with non-zero row mean - objectNZ <- object[!mcols(object)$allZero,,drop=FALSE] - - if (is.null(modelMatrix)) { - modelAsFormula <- TRUE - termsOrder <- attr(terms.formula(design(object)),"order") - interactionPresent <- any(termsOrder > 1) - blindDesign <- design(object) == formula(~ 1) - - # store modelMatrixType so it can be accessed by estimateBetaPriorVar - attr(object, "modelMatrixType") <- modelMatrixType - hasIntercept <- attr(terms(design(object)),"intercept") == 1 - renameCols <- hasIntercept - } else { - message("using supplied model matrix") - modelAsFormula <- FALSE - attr(object, "modelMatrixType") <- "user-supplied" - renameCols <- FALSE - } - - - # fit the negative binomial GLM without a prior - # (in actuality a very wide prior with standard deviation 1e3 on log2 fold changes) - fit <- fitNbinomGLMs(objectNZ, maxit=maxit, useOptim=useOptim, useQR=useQR, - renameCols=renameCols, modelMatrix=modelMatrix) - H <- fit$hat_diagonals - modelMatrix <- fit$modelMatrix - modelMatrixNames <- fit$modelMatrixNames - # record the wide prior variance which was used in fitting - betaPriorVar <- rep(1e6, ncol(fit$modelMatrix)) - - - # store mu in case the user did not call estimateDispersionsGeneEst - dimnames(fit$mu) <- NULL - assays(objectNZ)[["mu"]] <- fit$mu - assays(object)[["mu"]] <- buildMatrixWithNARows(fit$mu, mcols(object)$allZero) - - # store the prior variance directly as an attribute - # of the DESeqDataSet object, so it can be pulled later by - # the results function (necessary for setting max Cook's distance) - attr(object,"betaPrior") <- FALSE - attr(object,"betaPriorVar") <- betaPriorVar - attr(object,"modelMatrix") <- modelMatrix - - # add betas to the object - modelMatrixNames <- colnames(modelMatrix) - betaMatrix <- fit$betaMatrix - colnames(betaMatrix) <- modelMatrixNames - betaSE <- fit$betaSE - colnames(betaSE) <- paste0("SE_",modelMatrixNames) - betaConv <- fit$betaConv - - if (any(!betaConv)) { - if (!quiet) message(paste(sum(!betaConv),"rows did not converge in beta, labelled in mcols(object)$betaConv. Use larger maxit argument")) - } - - - resultsList <- c(matrixToList(betaMatrix), - matrixToList(betaSE), - list(betaConv = betaConv, - betaIter = fit$betaIter, - deviance = -2 * fit$logLike)) - - Results <- buildDataFrameWithNARows(resultsList, mcols(object)$allZero) - - modelMatrixNamesSpaces <- gsub("_"," ",modelMatrixNames) - - lfcType <- "MLE" - coefInfo <- paste(paste0("log2 fold change (",lfcType,"):"),modelMatrixNamesSpaces) - seInfo <- paste("standard error:",modelMatrixNamesSpaces) - - mcols(Results) <- DataFrame(type = rep("results",ncol(Results)), - description = c(coefInfo, seInfo, - "convergence of betas", - "iterations for betas", - "deviance for the fitted model")) - - mcols(object) <- cbind(mcols(object),Results) - return(object) -} - -# Unexported, low-level function for fitting negative binomial GLMs -# -# Users typically call \code{\link{nbinomWaldTest}} or \code{\link{nbinomLRT}} -# which calls this function to perform fitting. These functions return -# a \code{\link{DESeqDataSet}} object with the appropriate columns -# added. This function returns results as a list. -# -# object a DESeqDataSet -# modelMatrix the design matrix -# modelFormula a formula specifying how to construct the design matrix -# alpha_hat the dispersion parameter estimates -# lambda the 'ridge' term added for the penalized GLM on the log2 scale -# renameCols whether to give columns variable_B_vs_A style names -# betaTol control parameter: stop when the following is satisfied: -# abs(dev - dev_old)/(abs(dev) + 0.1) < betaTol -# maxit control parameter: maximum number of iteration to allow for -# convergence -# useOptim whether to use optim on rows which have not converged: -# Fisher scoring is not ideal with multiple groups and sparse -# count distributions -# useQR whether to use the QR decomposition on the design matrix X -# forceOptim whether to use optim on all rows -# warnNonposVar whether to warn about non positive variances, -# for advanced users only running LRT without beta prior, -# this might be desirable to be ignored. -# -# return a list of results, with coefficients and standard -# errors on the log2 scale -fitNbinomGLMs <- function(object, modelMatrix=NULL, modelFormula, alpha_hat, lambda, - renameCols=TRUE, betaTol=1e-8, maxit=100, useOptim=TRUE, - useQR=TRUE, forceOptim=FALSE, warnNonposVar=TRUE) { - if (missing(modelFormula)) { - modelFormula <- design(object) - } - if (is.null(modelMatrix)) { - modelAsFormula <- TRUE - modelMatrix <- model.matrix(modelFormula, data=colData(object)) - } else { - modelAsFormula <- FALSE - } - - stopifnot(all(colSums(abs(modelMatrix)) > 0)) - - # rename columns, for use as columns in DataFrame - # and to emphasize the reference level comparison - modelMatrixNames <- colnames(modelMatrix) - modelMatrixNames[modelMatrixNames == "(Intercept)"] <- "Intercept" - modelMatrixNames <- make.names(modelMatrixNames) - - if (renameCols) { - convertNames <- renameModelMatrixColumns(colData(object), - modelFormula) - convertNames <- convertNames[convertNames$from %in% modelMatrixNames,,drop=FALSE] - modelMatrixNames[match(convertNames$from, modelMatrixNames)] <- convertNames$to - } - colnames(modelMatrix) <- modelMatrixNames - - normalizationFactors <- if (!is.null(normalizationFactors(object))) { - normalizationFactors(object) - } else { - matrix(rep(sizeFactors(object),each=nrow(object)), - ncol=ncol(object)) - } - - if (missing(alpha_hat)) { - alpha_hat <- dispersionMatrix(object) - } - - if (nrow(alpha_hat) != nrow(object)) { - stop("alpha_hat needs to be the same length as nrows(object)") - } - - # set a wide prior for all coefficients - if (missing(lambda)) { - lambda <- rep(1e-6, ncol(modelMatrix)) - } - - # bypass the beta fitting if the model formula is only intercept and - # the prior variance is large (1e6) - # i.e., LRT with reduced ~ 1 and no beta prior - justIntercept <- if (modelAsFormula) { - modelFormula == formula(~ 1) - } else { - ncol(modelMatrix) == 1 & all(modelMatrix == 1) - } - if (justIntercept & all(lambda <= 1e-6)) { - alpha <- alpha_hat - betaConv <- rep(TRUE, nrow(object)) - betaIter <- rep(1,nrow(object)) - betaMatrix <- matrix(log2(mcols(object)$baseMean),ncol=1) - mu <- normalizationFactors * as.numeric(2^betaMatrix) - logLike <- rowSums(dnbinom(counts(object), mu=mu, size=1/alpha, log=TRUE)) - deviance <- -2 * logLike - modelMatrix <- model.matrix(~ 1, colData(object)) - colnames(modelMatrix) <- modelMatrixNames <- "Intercept" - w <- (mu^-1 + alpha)^-1 - xtwx <- rowSums(w) - sigma <- xtwx^-1 - betaSE <- matrix(log2(exp(1)) * sqrt(sigma),ncol=1) - hat_diagonals <- w * xtwx^-1; - res <- list(logLike = logLike, betaConv = betaConv, betaMatrix = betaMatrix, - betaSE = betaSE, mu = mu, betaIter = betaIter, - deviance = deviance, - modelMatrix=modelMatrix, - nterms=1, hat_diagonals=hat_diagonals) - return(res) - } - - qrx <- qr(modelMatrix) - # if full rank, estimate initial betas for IRLS below - if (qrx$rank == ncol(modelMatrix)) { - Q <- qr.Q(qrx) - R <- qr.R(qrx) - y <- t(log(counts(object,normalized=TRUE) + .1)) - beta_mat <- t(solve(R, t(Q) %*% y)) - } else { - if ("Intercept" %in% modelMatrixNames) { - beta_mat <- matrix(0, ncol=ncol(modelMatrix), nrow=nrow(object)) - # use the natural log as fitBeta occurs in the natural log scale - logBaseMean <- log(rowMeans(counts(object,normalized=TRUE))) - beta_mat[,which(modelMatrixNames == "Intercept")] <- logBaseMean - } else { - beta_mat <- matrix(1, ncol=ncol(modelMatrix), nrow=nrow(object)) - } - } - - # here we convert from the log2 scale of the betas - # and the beta prior variance to the log scale - # used in fitBeta. - # so we divide by the square of the - # conversion factor, log(2) - lambdaLogScale <- lambda / log(2)^2 - - betaRes <- fitBetaWrapper(ySEXP = counts(object), xSEXP = modelMatrix, - nfSEXP = normalizationFactors, - alpha_hatSEXP = alpha_hat, - beta_matSEXP = beta_mat, - lambdaSEXP = lambdaLogScale, - tolSEXP = betaTol, maxitSEXP = maxit, - useQRSEXP=useQR) - mu <- normalizationFactors * t(exp(modelMatrix %*% t(betaRes$beta_mat))) - dispersionMatrix <- dispersionMatrix(object) - logLike <- nbinomLogLike(counts(object), mu, dispersionMatrix(object)) - - # test for stability - rowStable <- apply(betaRes$beta_mat,1,function(row) sum(is.na(row))) == 0 - - # test for positive variances - rowVarPositive <- apply(betaRes$beta_var_mat,1,function(row) sum(row <= 0)) == 0 - - # test for convergence, stability and positive variances - betaConv <- betaRes$iter < maxit - - # here we transform the betaMatrix and betaSE to a log2 scale - betaMatrix <- log2(exp(1))*betaRes$beta_mat - colnames(betaMatrix) <- modelMatrixNames - colnames(modelMatrix) <- modelMatrixNames - # warn below regarding these rows with negative variance - betaSE <- log2(exp(1))*sqrt(pmax(betaRes$beta_var_mat,0)) - colnames(betaSE) <- paste0("SE_",modelMatrixNames) - - # switch based on whether we should also use optim - # on rows which did not converge - rowsForOptim <- if (useOptim) { - which(!betaConv | !rowStable | !rowVarPositive) - } else { - which(!rowStable | !rowVarPositive) - } - - if (forceOptim) { - rowsForOptim <- seq_along(betaConv) - } - - if (length(rowsForOptim) > 0) { - # we use optim if didn't reach convergence with the IRLS code - resOptim <- fitNbinomGLMsOptim(object,modelMatrix,lambda, - rowsForOptim,rowStable, - normalizationFactors,alpha_hat, - betaMatrix,betaSE,betaConv, - beta_mat, - mu,logLike) - betaMatrix <- resOptim$betaMatrix - betaSE <- resOptim$betaSE - betaConv <- resOptim$betaConv - mu <- resOptim$mu - logLike <- resOptim$logLike - } - - stopifnot(!any(is.na(betaSE))) - nNonposVar <- sum(rowSums(betaSE == 0) > 0) - if (warnNonposVar & nNonposVar > 0) warning(nNonposVar,"rows had non-positive estimates of variance for coefficients") - - list(logLike = logLike, betaConv = betaConv, betaMatrix = betaMatrix, - betaSE = betaSE, mu = mu, betaIter = betaRes$iter, - deviance = betaRes$deviance, - modelMatrix=modelMatrix, - nterms=ncol(modelMatrix), hat_diagonals=betaRes$hat_diagonals) -} - - -# breaking out the optim backup code from fitNbinomGLMs -fitNbinomGLMsOptim <- function(object,modelMatrix,lambda, - rowsForOptim,rowStable, - normalizationFactors,alpha_hat, - betaMatrix,betaSE,betaConv, - beta_mat, - mu,logLike) { - scaleCols <- apply(modelMatrix,2,function(z) max(abs(z))) - stopifnot(all(scaleCols > 0)) - x <- sweep(modelMatrix,2,scaleCols,"/") - lambdaColScale <- lambda / scaleCols^2 - lambdaColScale <- ifelse(lambdaColScale == 0, 1e-6, lambdaColScale) - lambdaLogScale <- lambda / log(2)^2 - lambdaLogScaleColScale <- lambdaLogScale / scaleCols^2 - large <- 30 - for (row in rowsForOptim) { - betaRow <- if (rowStable[row] & all(abs(betaMatrix[row,]) < large)) { - betaMatrix[row,] * scaleCols - } else { - beta_mat[row,] * scaleCols - } - nf <- normalizationFactors[row,] - k <- counts(object)[row,] - alpha <- alpha_hat[row] - objectiveFn <- function(p) { - mu_row <- as.numeric(nf * 2^(x %*% p)) - logLike <- sum(dnbinom(k,mu=mu_row,size=1/alpha,log=TRUE)) - logPrior <- sum(dnorm(p,0,sqrt(1/lambdaColScale),log=TRUE)) - negLogPost <- -1 * (logLike + logPrior) - if (is.finite(negLogPost)) negLogPost else 10^300 - } - o <- optim(betaRow, objectiveFn, method="L-BFGS-B",lower=-large, upper=large) - ridge <- if (length(lambdaLogScale) > 1) { - diag(lambdaLogScaleColScale) - } else { - as.matrix(lambdaLogScaleColScale,ncol=1) - } - # if we converged, change betaConv to TRUE - if (o$convergence == 0) { - betaConv[row] <- TRUE - } - # with or without convergence, store the estimate from optim - betaMatrix[row,] <- o$par / scaleCols - # calculate the standard errors - mu_row <- as.numeric(nf * 2^(x %*% o$par)) - w <- diag((mu_row^-1 + alpha)^-1) - xtwx <- t(x) %*% w %*% x - xtwxRidgeInv <- solve(xtwx + ridge) - sigma <- xtwxRidgeInv %*% xtwx %*% xtwxRidgeInv - # warn below regarding these rows with negative variance - betaSE[row,] <- log2(exp(1)) * sqrt(pmax(diag(sigma),0)) / scaleCols - # store the new mu vector - mu[row,] <- mu_row - logLike[row] <- sum(dnbinom(k, mu=mu_row, size=1/alpha, log=TRUE)) - } - return(list(betaMatrix=betaMatrix,betaSE=betaSE, - betaConv=betaConv, - mu=mu,logLike=logLike)) -} - -# Fit beta coefficients for negative binomial GLM -# -# This function estimates the coefficients (betas) for negative binomial generalized linear models. -# -# ySEXP n by m matrix of counts -# xSEXP m by k design matrix -# nfSEXP n by m matrix of normalization factors -# alpha_hatSEXP n length vector of the disperion estimates -# contrastSEXP a k length vector for a possible contrast -# beta_matSEXP n by k matrix of the initial estimates for the betas -# lambdaSEXP k length vector of the ridge values -# tolSEXP tolerance for convergence in estimates -# maxitSEXP maximum number of iterations -# useQRSEXP whether to use QR decomposition -# -# Note: at this level the betas are on the natural log scale -fitBetaWrapper <- function (ySEXP, xSEXP, nfSEXP, alpha_hatSEXP, contrastSEXP, - beta_matSEXP, lambdaSEXP, tolSEXP, maxitSEXP, useQRSEXP) { - if ( missing(contrastSEXP) ) { - # contrast is not required, just give 1,0,0,... - contrastSEXP <- c(1,rep(0,ncol(xSEXP)-1)) - } - # test for any NAs in arguments - arg.names <- names(formals(fitBetaWrapper)) - na.test <- sapply(mget(arg.names), function(x) any(is.na(x))) - if (any(na.test)) stop(paste("in call to fitBeta, the following arguments contain NA:", - paste(arg.names[na.test],collapse=", "))) - - fitBeta2(ySEXP=ySEXP, xSEXP=xSEXP, nfSEXP=nfSEXP, alpha_hatSEXP=alpha_hatSEXP, - contrastSEXP=contrastSEXP, beta_matSEXP=beta_matSEXP, - lambdaSEXP=lambdaSEXP, tolSEXP=tolSEXP, maxitSEXP=maxitSEXP, - useQRSEXP=useQRSEXP) -} - -### - -# Get base means and variances -# -# An internally used function to calculate the row means and variances -# from the normalized counts, which requires that \code{\link{estimateSizeFactors}} -# has already been called. Adds these and a logical column if the row sums -# are zero to the mcols of the object. -# -# object a DESeqDataSet object -# -# return a DESeqDataSet object with columns baseMean -# and baseVar in the row metadata columns -#' @importFrom genefilter rowVars -getBaseMeansAndVariances <- function(object) { - meanVarZero <- DataFrame(baseMean = unname(rowMeans(counts(object,normalized=TRUE))), - baseVar = unname(rowVars(counts(object,normalized=TRUE))), - allZero = unname(rowSums(counts(object)) == 0)) - mcols(meanVarZero) <- DataFrame(type = rep("intermediate",ncol(meanVarZero)), - description = c("mean of normalized counts for all samples", - "variance of normalized counts for all samples", - "all counts for a gene are zero")) - if (all(c("baseMean","baseVar","allZero") %in% names(mcols(object)))) { - mcols(object)[c("baseMean","baseVar","allZero")] <- meanVarZero - } else { - mcols(object) <- cbind(mcols(object),meanVarZero) - } - return(object) -} - - -# convenience function for testing the log likelihood -# for a count matrix, mu matrix and vector disp -nbinomLogLike <- function(counts, mu, disp) { - rowSums(matrix(dnbinom(counts, mu=mu,size=1/disp, - log=TRUE),ncol=ncol(counts))) -} -sanitizeRowData <- function(object) { - if (is.null(mcols(mcols(object)))) { - mcols(mcols(object)) <- DataFrame(type=rep("input",ncol(mcols(object))), - description=character(ncol(mcols(object)))) - } - class(mcols(mcols(object))$type) <- "character" - class(mcols(mcols(object))$description) <- "character" - mcols(mcols(object))$type[ is.na(mcols(mcols(object))$type) ] <- "" - mcols(mcols(object))$description[ is.na(mcols(mcols(object))$description) ] <- "" - object -} -factorPresentThreeOrMoreLevels <- function(object) { - designFactors <- getDesignFactors(object) - threeOrMore <- sapply(designFactors,function(v) nlevels(colData(object)[[v]]) >= 3) - any(threeOrMore) -} -getDesignFactors <- function(object) { - design <- design(object) - designVars <- all.vars(design) - designVarsClass <- sapply(designVars, function(v) class(colData(object)[[v]])) - designVars[designVarsClass == "factor"] -} -# convenience function for building larger matrices -# by filling in NA rows -buildMatrixWithNARows <- function(m, NARows) { - mFull <- matrix(NA, ncol=ncol(m), nrow=length(NARows)) - mFull[!NARows,] <- m - mFull -} - - -# convenience function for breaking up matrices -# by column and preserving column names -matrixToList <- function(m) { - l <- split(m, col(m)) - names(l) <- colnames(m) - l -} - -# convenience function to make more descriptive names -# for factor variables -renameModelMatrixColumns <- function(data, design) { - data <- as.data.frame(data) - designVars <- all.vars(design) - designVarsClass <- sapply(designVars, function(v) class(data[[v]])) - factorVars <- designVars[designVarsClass == "factor"] - colNamesFrom <- make.names(do.call(c,lapply(factorVars, function(v) paste0(v,levels(data[[v]])[-1])))) - colNamesTo <- make.names(do.call(c,lapply(factorVars, function(v) paste0(v,"_",levels(data[[v]])[-1],"_vs_",levels(data[[v]])[1])))) - data.frame(from=colNamesFrom,to=colNamesTo,stringsAsFactors=FALSE) -} - - -# convenience function for building results tables -# out of a list and filling in NA rows -buildDataFrameWithNARows <- function(resultsList, NArows) { - lengths <- sapply(resultsList,length) - if (!all(lengths == lengths[1])) { - stop("lengths of vectors in resultsList must be equal") - } - if (sum(!NArows) != lengths[1]) { - stop("number of non-NA rows must be equal to lengths of vectors in resultsList") - } - if (sum(NArows) == 0) { - return(DataFrame(resultsList)) - } - dfFull <- DataFrame(lapply(resultsList, function(x) vector(mode(x), length(NArows)))) - dfFull[NArows,] <- NA - dfFull[!NArows,] <- DataFrame(resultsList) - dfFull -} - -estimateSizeFactorsForMatrix <- function( counts, locfunc = stats::median, geoMeans, controlGenes ) -{ - if (missing(geoMeans)) { - loggeomeans <- rowMeans(log(counts)) - } else { - if (length(geoMeans) != nrow(counts)) { - stop("geoMeans should be as long as the number of rows of counts") - } - loggeomeans <- log(geoMeans) - } - if (all(is.infinite(loggeomeans))) { - stop("every gene contains at least one zero, cannot compute log geometric means") - } - sf <- if (missing(controlGenes)) { - apply(counts, 2, function(cnts) { - exp(locfunc((log(cnts) - loggeomeans)[is.finite(loggeomeans) & cnts > 0])) - }) - } else { - if ( !( is.numeric(controlGenes) | is.logical(controlGenes) ) ) { - stop("controlGenes should be either a numeric or logical vector") - } - loggeomeansSub <- loggeomeans[controlGenes] - apply(counts[controlGenes,,drop=FALSE], 2, function(cnts) { - exp(locfunc((log(cnts) - loggeomeansSub)[is.finite(loggeomeansSub) & cnts > 0])) - }) - } - sf -} - -xtailDispersionFit <- function(rawmeans, rawdisps, quantilePercent=0.35, binnum=50, minDisp=1e-8){ - useForFit <- rawdisps > 100 * minDisp - if(sum(useForFit) == 0){ - return (rawdisps) - stop("all gene-wise dispersion estimates are within 2 orders of magnitude - from the minimum value, and so the gene-wise estimates as final estimates.") - } - usedMeans <- rawmeans[useForFit] - usedDisps <- rawdisps[useForFit] - sortMeans <- usedMeans[order(usedMeans)] - sortDisps <- usedDisps[order(usedMeans)] - genenum <- length(sortMeans) - quantile_means <- c() - quantile_disps <- c() - for(i in seq(1,genenum,binnum)){ - num = min(binnum,genenum-i+1) - if(num<(binnum)){ - next - } - curbin_means <- sortMeans[i:(i+num-1)] - curbin_disps <- sortDisps[i:(i+num-1)] - - m <- curbin_means[order(curbin_disps)][1:floor(num*quantilePercent)] - d <- curbin_disps[order(curbin_disps)][1:floor(num*quantilePercent)] - quantile_means <- c(quantile_means, m) - quantile_disps <- c(quantile_disps, d) - } - dat <- data.frame(logDisps = log(quantile_disps), logMeans=log(quantile_means)) - fit <- locfit(logDisps~logMeans, data=dat[quantile_disps>=minDisp*10,,drop=FALSE]) - dispFit <- function(rawmeans)exp(predict(fit,data.frame(logMeans=log(rawmeans)))) - dispFit -} \ No newline at end of file diff --git a/R/RcppExports.R b/R/RcppExports.R index 5dabf17..8b93038 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,27 +1,27 @@ -# This file was generated by Rcpp::compileAttributes +# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 fitBeta2 <- function(ySEXP, xSEXP, nfSEXP, alpha_hatSEXP, contrastSEXP, beta_matSEXP, lambdaSEXP, tolSEXP, maxitSEXP, useQRSEXP) { - .Call('xtail_fitBeta2', PACKAGE = 'xtail', ySEXP, xSEXP, nfSEXP, alpha_hatSEXP, contrastSEXP, beta_matSEXP, lambdaSEXP, tolSEXP, maxitSEXP, useQRSEXP) + .Call('_xtail_fitBeta2', PACKAGE = 'xtail', ySEXP, xSEXP, nfSEXP, alpha_hatSEXP, contrastSEXP, beta_matSEXP, lambdaSEXP, tolSEXP, maxitSEXP, useQRSEXP) } probDensity <- function(k, alpha, intercept, sf, betas, cond) { - .Call('xtail_probDensity', PACKAGE = 'xtail', k, alpha, intercept, sf, betas, cond) + .Call('_xtail_probDensity', PACKAGE = 'xtail', k, alpha, intercept, sf, betas, cond) } probMatrix <- function(x, y, side) { - .Call('xtail_probMatrix', PACKAGE = 'xtail', x, y, side) + .Call('_xtail_probMatrix', PACKAGE = 'xtail', x, y, side) } probMatrixCI <- function(x, y, side, ci) { - .Call('xtail_probMatrixCI', PACKAGE = 'xtail', x, y, side, ci) + .Call('_xtail_probMatrixCI', PACKAGE = 'xtail', x, y, side, ci) } boundFC <- function(k1, alpha1, intercept1, sf1, FC1, cond1, k2, alpha2, intercept2, sf2, FC2, cond2, stepFC, toleration) { - .Call('xtail_boundFC', PACKAGE = 'xtail', k1, alpha1, intercept1, sf1, FC1, cond1, k2, alpha2, intercept2, sf2, FC2, cond2, stepFC, toleration) + .Call('_xtail_boundFC', PACKAGE = 'xtail', k1, alpha1, intercept1, sf1, FC1, cond1, k2, alpha2, intercept2, sf2, FC2, cond2, stepFC, toleration) } xtail_test <- function(k1, k2, ints1, ints2, log2FC_1, log2FC_2, disper1, disper2, sf1, sf2, cond1, cond2, bin, ci) { - .Call('xtail_xtail_test', PACKAGE = 'xtail', k1, k2, ints1, ints2, log2FC_1, log2FC_2, disper1, disper2, sf1, sf2, cond1, cond2, bin, ci) + .Call('_xtail_xtail_test', PACKAGE = 'xtail', k1, k2, ints1, ints2, log2FC_1, log2FC_2, disper1, disper2, sf1, sf2, cond1, cond2, bin, ci) } diff --git a/R/estimate-functions.R b/R/estimate-functions.R new file mode 100644 index 0000000..bc8fe69 --- /dev/null +++ b/R/estimate-functions.R @@ -0,0 +1,520 @@ +######################################################### +# +# These functions are copied and modify from DESeq2 package +# +######################################################### + +## Xtail relies on DESeq2 to estimate coefficient of GLM model + +#' @importFrom stats relevel formula terms +#' @importFrom S4Vectors mcols mcols<- +#' @importFrom SummarizedExperiment assay assay<- +.estimate_MLE_for_Beta <- function(object, + modelMatrix = NULL, + modelMatrixType = "standard", + maxit = 100, + useOptim = TRUE, + quiet = FALSE, + useQR = TRUE) { + stopifnot(length(maxit)==1) + + # in case the class of the mcols(mcols(object)) are not character + object <- .sanitize_rowData(object) + + if (is.null(mcols(object)$allZero)) { + object <- .get_base_means_and_variances(object) + } + + # only continue on the rows with non-zero row mean + objectNZ <- object[!mcols(object)$allZero,,drop=FALSE] + + if (is.null(modelMatrix)) { + modelAsFormula <- TRUE + termsOrder <- attr(terms(design(object)),"order") + interactionPresent <- any(termsOrder > 1) + blindDesign <- design(object) == formula(~ 1) + + # store modelMatrixType so it can be accessed by estimateBetaPriorVar + attr(object, "modelMatrixType") <- modelMatrixType + hasIntercept <- attr(terms(design(object)),"intercept") == 1 + renameCols <- hasIntercept + } else { + message("using supplied model matrix") + modelAsFormula <- FALSE + attr(object, "modelMatrixType") <- "user-supplied" + renameCols <- FALSE + } + + + # fit the negative binomial GLM without a prior + # (in actuality a very wide prior with standard deviation 1e3 on log2 fold changes) + fit <- .fit_Nbinom_GLMs(objectNZ, + maxit = maxit, + useOptim = useOptim, + useQR = useQR, + renameCols = renameCols, + modelMatrix = modelMatrix) + H <- fit$hat_diagonals + modelMatrix <- fit$modelMatrix + modelMatrixNames <- fit$modelMatrixNames + # record the wide prior variance which was used in fitting + betaPriorVar <- rep(1e6, ncol(fit$modelMatrix)) + + + # store mu in case the user did not call estimateDispersionsGeneEst + dimnames(fit$mu) <- NULL + assay(objectNZ,"mu",withDimnames = FALSE) <- fit$mu + assay(object,"mu",withDimnames = FALSE) <- .build_matrix_with_NA_rows(fit$mu, mcols(object)$allZero) + + # store the prior variance directly as an attribute + # of the DESeqDataSet object, so it can be pulled later by + # the results function (necessary for setting max Cook's distance) + attr(object,"betaPrior") <- FALSE + attr(object,"betaPriorVar") <- betaPriorVar + attr(object,"modelMatrix") <- modelMatrix + + # add betas to the object + modelMatrixNames <- colnames(modelMatrix) + betaMatrix <- fit$betaMatrix + colnames(betaMatrix) <- modelMatrixNames + betaSE <- fit$betaSE + colnames(betaSE) <- paste0("SE_",modelMatrixNames) + betaConv <- fit$betaConv + + if (any(!betaConv)) { + if (!quiet) message(paste(sum(!betaConv),"rows did not converge in beta, labelled in mcols(object)$betaConv. Use larger maxit argument")) + } + + + resultsList <- c(.matrix_to_list(betaMatrix), + .matrix_to_list(betaSE), + list(betaConv = betaConv, + betaIter = fit$betaIter, + deviance = -2 * fit$logLike)) + + Results <- .build_DataFrame_with_NA_rows(resultsList, mcols(object)$allZero) + + modelMatrixNamesSpaces <- gsub("_"," ",modelMatrixNames) + + lfcType <- "MLE" + coefInfo <- paste(paste0("log2 fold change (",lfcType,"):"),modelMatrixNamesSpaces) + seInfo <- paste("standard error:",modelMatrixNamesSpaces) + + mcols(Results) <- DataFrame(type = rep("results",ncol(Results)), + description = c(coefInfo, seInfo, + "convergence of betas", + "iterations for betas", + "deviance for the fitted model")) + + mcols(object) <- cbind(mcols(object),Results) + return(object) +} + + + + +# Unexported, low-level function for fitting negative binomial GLMs +# +# Users typically call \code{\link{nbinomWaldTest}} or \code{\link{nbinomLRT}} +# which calls this function to perform fitting. These functions return +# a \code{\link{DESeqDataSet}} object with the appropriate columns +# added. This function returns results as a list. +# +# object a DESeqDataSet +# modelMatrix the design matrix +# modelFormula a formula specifying how to construct the design matrix +# alpha_hat the dispersion parameter estimates +# lambda the 'ridge' term added for the penalized GLM on the log2 scale +# renameCols whether to give columns variable_B_vs_A style names +# betaTol control parameter: stop when the following is satisfied: +# abs(dev - dev_old)/(abs(dev) + 0.1) < betaTol +# maxit control parameter: maximum number of iteration to allow for +# convergence +# useOptim whether to use optim on rows which have not converged: +# Fisher scoring is not ideal with multiple groups and sparse +# count distributions +# useQR whether to use the QR decomposition on the design matrix X +# forceOptim whether to use optim on all rows +# warnNonposVar whether to warn about non positive variances, +# for advanced users only running LRT without beta prior, +# this might be desirable to be ignored. +# +# return a list of results, with coefficients and standard +# errors on the log2 scale +#' @importFrom stats model.matrix formula dnbinom +#' @importFrom S4Vectors mcols +#' @importFrom SummarizedExperiment colData assay +.fit_Nbinom_GLMs <- function(object, + modelMatrix=NULL, + modelFormula, + alpha_hat, + lambda, + renameCols = TRUE, + betaTol = 1e-8, + maxit = 100L, + useOptim = TRUE, + useQR = TRUE, + forceOptim = FALSE, + warnNonposVar = TRUE) { + if (missing(modelFormula)) { + modelFormula <- design(object) + } + if (is.null(modelMatrix)) { + modelAsFormula <- TRUE + modelMatrix <- model.matrix(modelFormula, data=colData(object)) + } else { + modelAsFormula <- FALSE + } + + stopifnot(all(colSums(abs(modelMatrix)) > 0)) + + # rename columns, for use as columns in DataFrame + # and to emphasize the reference level comparison + modelMatrixNames <- colnames(modelMatrix) + modelMatrixNames[modelMatrixNames == "(Intercept)"] <- "Intercept" + modelMatrixNames <- make.names(modelMatrixNames) + + if (renameCols) { + convertNames <- .rename_model_matrix_columns(colData(object), + modelFormula) + convertNames <- convertNames[convertNames$from %in% modelMatrixNames,,drop=FALSE] + modelMatrixNames[match(convertNames$from, modelMatrixNames)] <- convertNames$to + } + colnames(modelMatrix) <- modelMatrixNames + + normalizationFactors <- if (!is.null(normalizationFactors(object))) { + normalizationFactors(object) + } else { + matrix(rep(sizeFactors(object),each=nrow(object)), + ncol=ncol(object)) + } + + if (missing(alpha_hat)) { + alpha_hat <- assay(object, XTAIL_DISPERSION_ASSAY) + } + + if (nrow(alpha_hat) != nrow(object)) { + stop("alpha_hat needs to be the same length as nrows(object)") + } + + # set a wide prior for all coefficients + if (missing(lambda)) { + lambda <- rep(1e-6, ncol(modelMatrix)) + } + + # bypass the beta fitting if the model formula is only intercept and + # the prior variance is large (1e6) + # i.e., LRT with reduced ~ 1 and no beta prior + justIntercept <- if (modelAsFormula) { + modelFormula == formula(~ 1) + } else { + ncol(modelMatrix) == 1 & all(modelMatrix == 1) + } + if (justIntercept & all(lambda <= 1e-6)) { + alpha <- alpha_hat + betaConv <- rep(TRUE, nrow(object)) + betaIter <- rep(1,nrow(object)) + betaMatrix <- matrix(log2(mcols(object)$baseMean),ncol=1) + mu <- normalizationFactors * as.numeric(2^betaMatrix) + logLike <- rowSums(dnbinom(counts(object), mu=mu, size=1/alpha, log=TRUE)) + deviance <- -2 * logLike + modelMatrix <- model.matrix(~ 1, colData(object)) + colnames(modelMatrix) <- modelMatrixNames <- "Intercept" + w <- (mu^-1 + alpha)^-1 + xtwx <- rowSums(w) + sigma <- xtwx^-1 + betaSE <- matrix(log2(exp(1)) * sqrt(sigma),ncol=1) + hat_diagonals <- w * xtwx^-1; + res <- list(logLike = logLike, betaConv = betaConv, betaMatrix = betaMatrix, + betaSE = betaSE, mu = mu, betaIter = betaIter, + deviance = deviance, + modelMatrix=modelMatrix, + nterms=1, hat_diagonals=hat_diagonals) + return(res) + } + + qrx <- qr(modelMatrix) + # if full rank, estimate initial betas for IRLS below + if (qrx$rank == ncol(modelMatrix)) { + Q <- qr.Q(qrx) + R <- qr.R(qrx) + y <- t(log(counts(object,normalized=TRUE) + .1)) + beta_mat <- t(solve(R, t(Q) %*% y)) + } else { + if ("Intercept" %in% modelMatrixNames) { + beta_mat <- matrix(0, ncol=ncol(modelMatrix), nrow=nrow(object)) + # use the natural log as fitBeta occurs in the natural log scale + logBaseMean <- log(rowMeans(counts(object,normalized=TRUE))) + beta_mat[,which(modelMatrixNames == "Intercept")] <- logBaseMean + } else { + beta_mat <- matrix(1, ncol=ncol(modelMatrix), nrow=nrow(object)) + } + } + + # here we convert from the log2 scale of the betas + # and the beta prior variance to the log scale + # used in fitBeta. + # so we divide by the square of the + # conversion factor, log(2) + lambdaLogScale <- lambda / log(2)^2 + + betaRes <- .fit_Beta(ySEXP = counts(object), + xSEXP = modelMatrix, + nfSEXP = normalizationFactors, + alpha_hatSEXP = alpha_hat, + beta_matSEXP = beta_mat, + lambdaSEXP = lambdaLogScale, + tolSEXP = betaTol, + maxitSEXP = maxit, + useQRSEXP = useQR) + mu <- normalizationFactors * t(exp(modelMatrix %*% t(betaRes$beta_mat))) + dispersionMatrix <- assay(object, XTAIL_DISPERSION_ASSAY) + logLike <- .get_nbinom_log_like(counts(object), + mu, + assay(object, XTAIL_DISPERSION_ASSAY)) + + # test for stability + rowStable <- apply(betaRes$beta_mat,1,function(row) sum(is.na(row))) == 0 + + # test for positive variances + rowVarPositive <- apply(betaRes$beta_var_mat,1,function(row) sum(row <= 0)) == 0 + + # test for convergence, stability and positive variances + betaConv <- betaRes$iter < maxit + + # here we transform the betaMatrix and betaSE to a log2 scale + betaMatrix <- log2(exp(1))*betaRes$beta_mat + colnames(betaMatrix) <- modelMatrixNames + colnames(modelMatrix) <- modelMatrixNames + # warn below regarding these rows with negative variance + betaSE <- log2(exp(1))*sqrt(pmax(betaRes$beta_var_mat,0)) + colnames(betaSE) <- paste0("SE_",modelMatrixNames) + + # switch based on whether we should also use optim + # on rows which did not converge + rowsForOptim <- if (useOptim) { + which(!betaConv | !rowStable | !rowVarPositive) + } else { + which(!rowStable | !rowVarPositive) + } + + if (forceOptim) { + rowsForOptim <- seq_along(betaConv) + } + + if (length(rowsForOptim) > 0) { + # we use optim if didn't reach convergence with the IRLS code + resOptim <- .fit_Nbinom_GLMs_Optim(object, + modelMatrix, + lambda, + rowsForOptim, + rowStable, + normalizationFactors, + alpha_hat, + betaMatrix, + betaSE, + betaConv, + beta_mat, + mu, + logLike) + betaMatrix <- resOptim$betaMatrix + betaSE <- resOptim$betaSE + betaConv <- resOptim$betaConv + mu <- resOptim$mu + logLike <- resOptim$logLike + } + + stopifnot(!any(is.na(betaSE))) + nNonposVar <- sum(rowSums(betaSE == 0) > 0) + if (warnNonposVar & nNonposVar > 0) warning(nNonposVar,"rows had non-positive estimates of variance for coefficients") + + list(logLike = logLike, betaConv = betaConv, betaMatrix = betaMatrix, + betaSE = betaSE, mu = mu, betaIter = betaRes$iter, + deviance = betaRes$deviance, + modelMatrix=modelMatrix, + nterms=ncol(modelMatrix), hat_diagonals=betaRes$hat_diagonals) +} + + + +# breaking out the optim backup code from fitNbinomGLMs +#' @importFrom stats dnbinom dnorm optim +.fit_Nbinom_GLMs_Optim <- function(object, + modelMatrix, + lambda, + rowsForOptim, + rowStable, + normalizationFactors, + alpha_hat, + betaMatrix, + betaSE, + betaConv, + beta_mat, + mu, + logLike) { + scaleCols <- apply(modelMatrix,2,function(z) max(abs(z))) + stopifnot(all(scaleCols > 0)) + x <- sweep(modelMatrix,2,scaleCols,"/") + lambdaColScale <- lambda / scaleCols^2 + lambdaColScale <- ifelse(lambdaColScale == 0, 1e-6, lambdaColScale) + lambdaLogScale <- lambda / log(2)^2 + lambdaLogScaleColScale <- lambdaLogScale / scaleCols^2 + large <- 30 + for (row in rowsForOptim) { + betaRow <- if (rowStable[row] & all(abs(betaMatrix[row,]) < large)) { + betaMatrix[row,] * scaleCols + } else { + beta_mat[row,] * scaleCols + } + nf <- normalizationFactors[row,] + k <- counts(object)[row,] + alpha <- alpha_hat[row] + objectiveFn <- function(p) { + mu_row <- as.numeric(nf * 2^(x %*% p)) + logLike <- sum(dnbinom(k,mu=mu_row,size=1/alpha,log=TRUE)) + logPrior <- sum(dnorm(p,0,sqrt(1/lambdaColScale),log=TRUE)) + negLogPost <- -1 * (logLike + logPrior) + if (is.finite(negLogPost)) negLogPost else 10^300 + } + o <- optim(betaRow, objectiveFn, method="L-BFGS-B",lower=-large, upper=large) + ridge <- if (length(lambdaLogScale) > 1) { + diag(lambdaLogScaleColScale) + } else { + as.matrix(lambdaLogScaleColScale,ncol=1) + } + # if we converged, change betaConv to TRUE + if (o$convergence == 0) { + betaConv[row] <- TRUE + } + # with or without convergence, store the estimate from optim + betaMatrix[row,] <- o$par / scaleCols + # calculate the standard errors + mu_row <- as.numeric(nf * 2^(x %*% o$par)) + w <- diag((mu_row^-1 + alpha)^-1) + xtwx <- t(x) %*% w %*% x + xtwxRidgeInv <- solve(xtwx + ridge) + sigma <- xtwxRidgeInv %*% xtwx %*% xtwxRidgeInv + # warn below regarding these rows with negative variance + betaSE[row,] <- log2(exp(1)) * sqrt(pmax(diag(sigma),0)) / scaleCols + # store the new mu vector + mu[row,] <- mu_row + logLike[row] <- sum(dnbinom(k, mu=mu_row, size=1/alpha, log=TRUE)) + } + return(list(betaMatrix=betaMatrix,betaSE=betaSE, + betaConv=betaConv, + mu=mu,logLike=logLike)) +} + +### + +# Get base means and variances +# +# An internally used function to calculate the row means and variances +# from the normalized counts, which requires that \code{\link{estimateSizeFactors}} +# has already been called. Adds these and a logical column if the row sums +# are zero to the mcols of the object. +# +# object a DESeqDataSet object +# +# return a DESeqDataSet object with columns baseMean +# and baseVar in the row metadata columns +#' @importFrom matrixStats rowVars +#' @importFrom S4Vectors mcols mcols<- +.get_base_means_and_variances <- function(object) { + meanVarZero <- DataFrame(baseMean = unname(rowMeans(counts(object,normalized=TRUE))), + baseVar = unname(rowVars(counts(object,normalized=TRUE))), + allZero = unname(rowSums(counts(object)) == 0)) + mcols(meanVarZero) <- DataFrame(type = rep("intermediate",ncol(meanVarZero)), + description = c("mean of normalized counts for all samples", + "variance of normalized counts for all samples", + "all counts for a gene are zero")) + if (all(c("baseMean","baseVar","allZero") %in% names(mcols(object)))) { + mcols(object)[c("baseMean","baseVar","allZero")] <- meanVarZero + } else { + mcols(object) <- cbind(mcols(object),meanVarZero) + } + return(object) +} + + + + + + +# convenience function for testing the log likelihood +# for a count matrix, mu matrix and vector disp +#' @importFrom stats dnbinom +.get_nbinom_log_like <- function(counts, mu, disp) { + nbd <- dnbinom(counts, + mu = mu, + size = 1/disp, + log = TRUE) + mat <- matrix(nbd, ncol = ncol(counts)) + rowSums(mat) +} + +#' @importFrom S4Vectors mcols mcols<- +.sanitize_rowData <- function(object) { + mc <- mcols(mcols(object)) + if (is.null(mc)) { + mc <- DataFrame(type = rep("input",ncol(mcols(object))), + description = character(ncol(mcols(object)))) + } else { + mc$type <- as.character(mc$type) + mc$description <- as.character(mc$description) + mc$type[is.na(mc$type)] <- "" + mc$description[is.na(mc$description)] <- "" + } + mcols(mcols(object)) <- mc + object +} + + +# convenience function for building larger matrices +# by filling in NA rows +.build_matrix_with_NA_rows <- function(m, NARows) { + mFull <- matrix(NA, ncol=ncol(m), nrow=length(NARows)) + mFull[!NARows,] <- m + mFull +} + +# convenience function for building results tables +# out of a list and filling in NA rows +.build_DataFrame_with_NA_rows <- function(resultsList, NArows) { + lengths <- vapply(resultsList,length,integer(1)) + if (!all(lengths == lengths[1])) { + stop("lengths of vectors in resultsList must be equal") + } + if (sum(!NArows) != lengths[1]) { + stop("number of non-NA rows must be equal to lengths of vectors in resultsList") + } + if (sum(NArows) == 0) { + return(DataFrame(resultsList)) + } + dfFull <- DataFrame(lapply(resultsList, function(x) vector(mode(x), length(NArows)))) + dfFull[NArows,] <- NA + dfFull[!NArows,] <- DataFrame(resultsList) + dfFull +} + + +# convenience function for breaking up matrices +# by column and preserving column names +.matrix_to_list <- function(m) { + l <- split(m, col(m)) + names(l) <- colnames(m) + l +} + +# convenience function to make more descriptive names +# for factor variables +.rename_model_matrix_columns <- function(data, design) { + data <- as.data.frame(data) + designVars <- all.vars(design) + designVarsClass <- vapply(designVars, function(v) class(data[[v]]), + character(1)) + factorVars <- designVars[designVarsClass == "factor"] + colNamesFrom <- make.names(do.call(c,lapply(factorVars, function(v) paste0(v,levels(data[[v]])[-1])))) + colNamesTo <- make.names(do.call(c,lapply(factorVars, function(v) paste0(v,"_",levels(data[[v]])[-1],"_vs_",levels(data[[v]])[1])))) + data.frame(from=colNamesFrom,to=colNamesTo,stringsAsFactors=FALSE) +} diff --git a/R/methods.R b/R/methods.R deleted file mode 100755 index a5e0af7..0000000 --- a/R/methods.R +++ /dev/null @@ -1,184 +0,0 @@ -dispersionMatrix.DESeqDataSet <- function(object){ - if (!"dispersionMatrix" %in% names(assays(object))) return (NULL) - disp <- assays(object)[["dispersionMatrix"]] - colnames(disp) <- colnames(object) - disp -} - -#' @export -setMethod("dispersionMatrix", signature(object="DESeqDataSet"), - dispersionMatrix.DESeqDataSet) - -#' @name dispersions -#' @rdname dispersions -#' @exportMethod "dispersions<-" -setReplaceMethod("dispersionMatrix", signature(object="DESeqDataSet", value="matrix"), - function(object, value) { - assays(object)[["dispersionMatrix"]] <- value - validObject( object ) - object - }) - -#' Summarize xtail results -#' Print a summary of the results of xtail -#' -#' @usage -#' \method{summary}{xtailResults}(object,alpha) -#' @param object a \code{xtailResults} obect -#' @param alpha the adjusted p-value cutoff. If not set, -#' the default is 0.1. -#' -#' @docType methods -#' @name summary -#' @rdname summary -#' @aliases summary summary.xtailResults -#' -#' @export -summary.xtailResults <- function(object,alpha){ - if(is.null(object$resultsTable)) stop("error, the object must be created by using the xtail function.") - if(!is(object, "xtailResults")) stop("error, the object must be created by using the xtail function.") - if (missing(alpha)){ - alpha <- 0.1 - } - resultsTable <- object$resultsTable - cat("\n") - all_num <- nrow(resultsTable) - up_num <- sum(resultsTable$log2FC_TE_final > 0 & resultsTable$pvalue.adjust < alpha, na.rm=TRUE) - down_num <- sum(resultsTable$log2FC_TE_final < 0 & resultsTable$pvalue.adjust < alpha, na.rm=TRUE) - - cat("The total number of gene is:", all_num,"\n") - cat("Number of the log2FC and log2R used in determining the final p-value:\n") - cat(" log2FC:", object$log2FC_determine_num,"\n") - cat(" log2R :", object$log2R_determine_num,"\n") - cat("\n") - cat("adjusted pvalue < ", alpha, "\n") - cat(" log2FC_TE > 0 (up) :", up_num, "\n") - cat(" log2FC_TE < 0 (down):", down_num,"\n") - cat("\n") -} - -resultsTable.xtailResults <- function(object,sort.by="pvalue.adjust", log2FCs = FALSE, log2Rs = FALSE){ - #check object - if(!is(object, "xtailResults")) stop("error, the object must be created by using the xtail function.") - if(is.null(object$resultsTable)) stop("error, the object must be created by using the xtail function.") - - if (missing(sort.by)){ - tab <-object$resultsTable - }else{ - #check sort.by - sort.by <- match.arg(sort.by, c("log2FC_TE_v1", "log2FC_TE_v2", "log2FC_TE_final" ,"pvalue_v1" , "pvalue_v2", "pvalue_final","pvalue.adjust")) - - #absolute TE fold change. - if(is.element(sort.by, c("log2FC_TE_v1","log2FC_TE_v2","log2FC_TE_final"))){ - tefc <- abs(object$resultsTable[[sort.by]]) - }else{ - tefc <- abs(object$resultsTable[["log2FC_TE_final"]]) - } - - #pvalue order - if(is.element(sort.by, c("pvalue_v1","pvalue_v2","pvalue_final", "pvalue.adjust"))) pvfc <- object$resultsTable[[sort.by]] - - # out - o <- switch(sort.by, - "log2FC_TE_v1" = order(tefc, decreasing=TRUE), - "log2FC_TE_v2" = order(tefc, decreasing=TRUE), - "log2FC_TE_final" = order(tefc, decreasing=TRUE), - "pvalue_v1" = order(pvfc, -tefc), - "pvalue_v2" = order(pvfc, -tefc), - "pvalue_final" = order(pvfc, -tefc), - "pvalue.adjust" = order(pvfc, -tefc) - ) - tab <- object$resultsTable[o,] - } - #remove redundant columns - if (!log2Rs){ - condition1 <- paste0(object$condition1, "_log2TE") - condition2 <- paste0(object$condition2, "_log2TE") - tab[[condition1]] <- NULL - tab[[condition2]] <- NULL - } - if (!log2FCs){ - tab$mRNA_log2FC <- NULL - tab$RPF_log2FC <- NULL - } - tab - -} - -#' Results table of xtail results -#' Print the results of xtail -#' -#' @usage -#' \method{resultsTable}{xtailResults}(object, sort.by) -#' @param object a \code{xtailResults} obect -#' @param sort.by the sorted column. By default, the pvalue.adjust is used. -#' @docType methods -#' @name resultsTable -#' @rdname resultsTable -#' @aliases resultsTable resultsTable.xtailResults -#' -#' @export -setMethod("resultsTable", signature(object="xtailResults"),resultsTable.xtailResults) - - -#' write xtail results as table -#' -#' @usage -#' \method{write.xtail}{xtailResults}(object) -#' @param object a \code{xtailResults} obect -#' -#' @docType methods -#' @name write.xtail -#' @rdname write.xtail -#' @aliases write.xtail write.table -#' -#' @export -#' -write.xtail <- function(object, ..., sort.by="pvalue.adjust", log2FCs = FALSE, log2Rs = FALSE){ - #check object - if(!is(object, "xtailResults")) stop("error, the object must be created by using the xtail function.") - if(is.null(object$resultsTable)) stop("error, the object must be created by using the xtail function.") - - if (missing(sort.by)){ - tab <- object$resultsTable - }else{ - #check sort.by - sort.by <- match.arg(sort.by, c("log2FC_TE_v1", "log2FC_TE_v2" ,"log2FC_TE_final", "pvalue_v1" , "pvalue_v2", "pvalue_final","pvalue.adjust")) - - #absolute TE fold change. - if(is.element(sort.by, c("log2FC_TE_v1","log2FC_TE_v2","log2FC_TE_final"))){ - tefc <- abs(object$resultsTable[[sort.by]]) - }else{ - tefc <- abs(object$resultsTable[["log2FC_TE_final"]]) - } - - #pvalue order - if(is.element(sort.by, c("pvalue_v1","pvalue_v2","pvalue_final", "pvalue.adjust"))) pvfc <- object$resultsTable[[sort.by]] - - # out - o <- switch(sort.by, - "log2FC_TE_v1" = order(tefc, decreasing=TRUE), - "log2FC_TE_v2" = order(tefc, decreasing=TRUE), - "log2FC_TE_final" = order(tefc, decreasing=TRUE), - "pvalue_v1" = order(pvfc, -tefc), - "pvalue_v2" = order(pvfc, -tefc), - "pvalue_final" = order(pvfc, -tefc), - "pvalue.adjust" = order(pvfc, -tefc) - ) - tab <- object$resultsTable[o,] - } - - #remove redundant columns - if (!log2Rs){ - condition1 <- paste0(object$condition1, "_log2TE") - condition2 <- paste0(object$condition2, "_log2TE") - tab[[condition1]] <- NULL - tab[[condition2]] <- NULL - } - if (!log2FCs){ - tab$mRNA_log2FC <- NULL - tab$RPF_log2FC <- NULL - } - write.table(tab, ...) -} - diff --git a/R/plot-fc.R b/R/plot-fc.R new file mode 100644 index 0000000..fbe9e78 --- /dev/null +++ b/R/plot-fc.R @@ -0,0 +1,79 @@ +#' Scatterplot of fold changes. +#' +#' A simple function that plots the fold change in mRNA and RPF. +#' Different classes of genes are labeled with different of color. +#' Each dot, representing a particular gene, are color coded by the category of +#' expression change. +#' +#' @param object a \code{xtail} object +#' +#' @param log2FC.cutoff cutoff for categorizing results +#' +#' @param xlim argument for limiting the range plotted on the x axis. See +#' \code{\link[ggplot2:scale_continuous]{scale_continuous}} for more +#' details. +#' +#' @param ylim argument for limiting the range plotted on the y axis. See +#' \code{\link[ggplot2:scale_continuous]{scale_continuous}} for more +#' details. +#' +#' @param ... optional arguments. Currently not used +#' +#' @return a \code{ggplot} object +#' +#' @name plotFCs +#' +#' @examples +#' data(xtailres) +#' plotFCs(xtailres) +NULL + +#' @rdname plotFCs +#' @export +setGeneric("plotFCs", + function(object,...) + standardGeneric("plotFCs")) + +#' @rdname plotFCs +#' +#' @import ggplot2 +#' @importFrom stats complete.cases +#' +#' @export +setMethod("plotFCs", signature = "xtail", + function(object, + log2FC.cutoff = 1, + xlim = NULL, + ylim = NULL){ + # input check + if(!is.numeric(log2FC.cutoff) || + length(log2FC.cutoff) != 1L || + log2FC.cutoff <= 0 ) { + stop("log2FC.cutoff must be a single numeric value and be larger ", + "than 0.", call. = FALSE) + } + if(!is.null(xlim)){ + if(!is.numeric(xlim) || length(xlim) != 2L) { + stop("invalid xlim value, should have two numeric elements.", + call. = FALSE) + } + } + if(!is.null(ylim)){ + if(!is.numeric(ylim) || length(ylim) != 2L) { + stop("invalid ylim value, should have two numeric elements.", + call. = FALSE) + } + } + # + table <- resultsTable(object, sort.by = "pvalue_final", log2FCs = TRUE) + table <- table[complete.cases(as.data.frame(table)),] + categories <- c("transcription only" = "#0D5FFF", + "translation only" = "#FF2E06", + "homodirectional" = "#75E805", + "opposite change" = "#FFDE13", + "stable" = "gray90") + x <- "mRNA_log2FC" + y <- "RPF_log2FC" + .plot_scatter(table, x, y, categories, log2FC.cutoff, xlim, ylim) + } +) diff --git a/R/plot-r.R b/R/plot-r.R new file mode 100644 index 0000000..ce2ff06 --- /dev/null +++ b/R/plot-r.R @@ -0,0 +1,81 @@ +#' Scatterplot of log2 RPF-to-mRNA ratios. +#' +#' A simple function that plots the log2 RPF-to-mRNA ratios in two conditions. +#' Different classes of genes are labeled with different of color. +#' Each dot, representing a particular gene, are color coded by the category of +#' expression change. +#' +#' @param object a \code{xtail} object +#' +#' @param log2R.cutoff cutoff for categorizing results +#' +#' @param xlim argument for limiting the range plotted on the x axis. See +#' \code{\link[ggplot2:scale_continuous]{scale_continuous}} for more +#' details. +#' +#' @param ylim argument for limiting the range plotted on the y axis. See +#' \code{\link[ggplot2:scale_continuous]{scale_continuous}} for more +#' details. +#' +#' @param ... optional arguments. Currently not used +#' +#' @return a \code{ggplot} object +#' +#' @name plotRs +#' +#' @examples +#' data(xtailres) +#' plotRs(xtailres) +NULL + +#' @rdname plotRs +#' @export +setGeneric("plotRs", + function(object,...) + standardGeneric("plotRs")) + +#' @rdname plotRs +#' +#' @import ggplot2 +#' @importFrom stats complete.cases +#' +#' @export +setMethod("plotRs", signature = "xtail", + function(object, + log2R.cutoff = 1, + xlim = NULL, + ylim = NULL){ + # input check + if(!is.numeric(log2R.cutoff) || + length(log2R.cutoff) != 1L || + log2R.cutoff <= 0 ) { + stop("log2R.cutoff must be a single numeric value and be larger ", + "than 0.", call. = FALSE) + } + if(!is.null(xlim)){ + if(!is.numeric(xlim) || length(xlim) != 2L) { + stop("invalid xlim value, should have two numeric elements.", + call. = FALSE) + } + } + if(!is.null(ylim)){ + if(!is.numeric(ylim) || length(ylim) != 2L) { + stop("invalid ylim value, should have two numeric elements.", + call. = FALSE) + } + } + # + table <- resultsTable(object, sort.by = "pvalue_final", log2Rs = TRUE) + table <- table[complete.cases(as.data.frame(table)),] + cond <- paste0(conditions(object),"_log2TE") + categories <- c("condition1" = "#0D5FFF", + "condition2" = "#FF2E06", + "homodirectional" = "#75E805", + "opposite change" = "#FFDE13", + "stable" = "gray90") + names(categories)[seq_len(2L)] <- paste0(conditions(object), " only") + x <- cond[1L] + y <- cond[2L] + .plot_scatter(table, x, y, categories, log2R.cutoff, xlim, ylim) + } +) diff --git a/R/plot-utils.R b/R/plot-utils.R new file mode 100644 index 0000000..2efa58d --- /dev/null +++ b/R/plot-utils.R @@ -0,0 +1,37 @@ +.add_category_label <- function(data, x_cat, y_cat, cutoff, categories){ + data$Category <- categories[5L] + x_only <- abs(data[,x_cat]) >= cutoff & + abs(data[,y_cat]) < cutoff + data$Category[x_only] <- categories[1L] + y_only <- abs(data[,y_cat]) >= cutoff & + abs(data[,x_cat]) < cutoff + data$Category[y_only] <- categories[2L] + x_and_y <- abs(data[,x_cat]) >= cutoff & + abs(data[,y_cat]) >= cutoff + data$Category[x_and_y] <- categories[3L] + x_opposite_y <- abs(data[,x_cat]) >= cutoff & + abs(data[,y_cat]) >= cutoff & + sign(data[,x_cat]) != sign(data[,y_cat]) + data$Category[x_opposite_y] <- categories[4L] + data +} + +.plot_scatter <- function(data, x, y, categories, cutoff, xlim, ylim){ + data <- .add_category_label(data, + x_cat = x, + y_cat = y, + cutoff = cutoff, + categories = names(categories)) + plot_out <- ggplot(as.data.frame(data), + aes_string(x = x, y = y)) + + geom_hline(yintercept = cutoff, colour = "gray") + + geom_hline(yintercept = -cutoff, colour = "gray") + + geom_vline(xintercept = cutoff, colour = "gray") + + geom_vline(xintercept = -cutoff, colour = "gray") + + geom_point(aes_string(colour = "Category"), shape = 19L, alpha = 0.65) + + scale_x_continuous(limits = xlim) + + scale_y_continuous(limits = ylim) + + scale_colour_manual(values = categories) + + theme_bw() + plot_out +} diff --git a/R/plot-volcano.R b/R/plot-volcano.R new file mode 100644 index 0000000..bf03628 --- /dev/null +++ b/R/plot-volcano.R @@ -0,0 +1,38 @@ +#' Volcano plot +#' +#' A simple function that plots log2 fold change of TE and -log10 of pvalues +#' The genes are color coded by P-value (-log10) +#' +#' @param object a \code{xtail} object +#' +#' @param ... optional arguments. Currently not used +#' +#' @return a \code{ggplot} object +#' +#' @name volcanoPlot +#' +#' @examples +#' data(xtailres) +#' volcanoPlot(xtailres) +NULL + +#' @rdname volcanoPlot +#' @export +setGeneric("volcanoPlot", + function(object, ...) + standardGeneric("volcanoPlot")) + +#' @rdname volcanoPlot +#' +#' @import ggplot2 +#' +#' @export +setMethod("volcanoPlot", signature = "xtail", + function(object){ + ggplot(data = as.data.frame(resultsTable(object)), + aes_string(x = "log2FC_TE_final", + y = "-log10(pvalue_final)"))+ + geom_point(colour="#836FFF", alpha = 0.65, shape = 19L) + + theme_bw() + } +) diff --git a/R/plots.R b/R/plots.R deleted file mode 100755 index 7a6e5c8..0000000 --- a/R/plots.R +++ /dev/null @@ -1,255 +0,0 @@ - -#' Scatterplot of fold changes. -#' -#' A simple function that plots the fold change in mRNA and RPF. -#' Different classes of genes are labeled with different of color. -#' Each dot, representing a particular gene, are color coded by P-value (-log10) -#' of its differential translation across two conditions, which was estimated with xtail. -#' -#' @docType methods -#' @name plotFCs -#' @rdname plotFCs -#' @aliases plotFCs -#' -#' @importFrom scales -#' -#' @param object a xtailResults -#' @ explort - -plotFCs <- function(object, log2FC.cutoff = 1, cex=1, xlim, ylim, ..., cex.lab, cex.axis, cex.main, cex.sub, pch) -{ - list.of.packages <- "scales" - if(list.of.packages %in% installed.packages()[,"Package"]){ - library("scales") - }else{ - message('please install "scales" before run this function') - } - - if(is.null(object$resultsTable)) stop("error, the object must be created by using the xtail function.") - if(!is(object, "xtailResults")) stop("error, the object must be created by using the xtail function.") - - if (log2FC.cutoff <= 0 ) stop("log2FC.cutoff must be larger than 0") - if (!missing(xlim)){ - if (length(xlim) != 2) stop("invalid xlim value, should have two elemtnts") - } - if (!missing(ylim)){ - if (length(ylim) != 2) stop("invalid ylim value, should have two elemtnts") - } - resultsTable <- object$resultsTable - resultsTable <- resultsTable[complete.cases(resultsTable),] - resultsTable <- resultsTable[order(resultsTable$pvalue_final, decreasing = TRUE),] - resultsTable$colorscale <- rescale(-log10(resultsTable$pvalue_final)) - - if (missing(xlim)){ - xmin <- min(resultsTable$mRNA_log2FC,na.rm=T) - 0.1 - xmax <- max(resultsTable$mRNA_log2FC,na.rm=T) + 0.1 - xlim <- c(xmin,xmax) - } - if (missing(ylim)){ - ymin <- min(resultsTable$RPF_log2FC,na.rm=T) - 0.1 - ymax <- max(resultsTable$RPF_log2FC,na.rm=T) + 0.1 - ylim <- c(ymin,ymax) - } - if (missing(cex.lab)) cex.lab <- 1.2 - if (missing(cex.axis)) cex.axis <- 1 - if (missing(cex.main)) cex.main <- 1.2 - if (missing(cex.sub)) cex.sub <- 1 - if (missing(pch)) pch <- 20 - - #mRNA stable, RPF stable. - variable <- which(abs(resultsTable$mRNA_log2FC - resultsTable$RPF_log2FC) <= log2FC.cutoff | - (abs(resultsTable$mRNA_log2FC) < log2FC.cutoff & abs(resultsTable$RPF_log2FC) < log2FC.cutoff)) - plot(resultsTable$mRNA_log2FC[variable],resultsTable$RPF_log2FC[variable],pch=pch,col="gray90", - xlim=xlim,ylim=ylim,xlab = "mRNA log2FC", ylab="RPF log2FC",frame.plot=TRUE,cex=cex, - cex.lab=cex.lab, cex.axis=cex.axis, cex.main=cex.main, cex.sub=cex.sub, ...) - - #mRNA change, RPF stable. - variable <- which( abs(resultsTable$mRNA_log2FC) >= log2FC.cutoff & - abs(resultsTable$RPF_log2FC) < log2FC.cutoff & - abs(resultsTable$mRNA_log2FC - resultsTable$RPF_log2FC) >= log2FC.cutoff) - colornames <- seq_gradient_pal("#BFBBFF","#0D5FFF")(resultsTable$colorscale[variable]) - points(resultsTable$mRNA_log2FC[variable],resultsTable$RPF_log2FC[variable], - pch=pch,col=alpha(colornames,0.8),cex=cex) - leg <- "transcription only" - leg.col <- "#0D5FFF" - - #mRNA stable, RPF change. - variable <- which( abs(resultsTable$mRNA_log2FC) < log2FC.cutoff & - abs(resultsTable$RPF_log2FC) >= log2FC.cutoff & - abs(resultsTable$mRNA_log2FC - resultsTable$RPF_log2FC) >= log2FC.cutoff ) - colornames <- seq_gradient_pal("#FFB69C","#FF2E06")(resultsTable$colorscale[variable]) - points(resultsTable$mRNA_log2FC[variable],resultsTable$RPF_log2FC[variable],pch=pch,col=alpha(colornames,0.8),cex=cex) - leg <- c(leg, "translation only") - leg.col <- c(leg.col, "#FF2E06") - - #mRNA change, RPF change, homodirectional. - variable <- which( sign(resultsTable$mRNA_log2FC) * sign(resultsTable$RPF_log2FC) > 0 & - abs(resultsTable$mRNA_log2FC) >= log2FC.cutoff & - abs(resultsTable$RPF_log2FC) >= log2FC.cutoff & - abs(resultsTable$mRNA_log2FC - resultsTable$RPF_log2FC) >= log2FC.cutoff ) - colornames <- seq_gradient_pal("#D1F7AD","#75E805")(resultsTable$colorscale[variable]) - points(resultsTable$mRNA_log2FC[variable],resultsTable$RPF_log2FC[variable],pch=pch,col=alpha(colornames,0.8),cex=cex) - leg <- c(leg, "homodirectional") - leg.col <- c(leg.col, "#75E805") - - #mRNA change, RPF change, opposite change. - variable <- which( sign(resultsTable$mRNA_log2FC) * sign(resultsTable$RPF_log2FC) < 0 & - abs(resultsTable$mRNA_log2FC) >= log2FC.cutoff & - abs(resultsTable$RPF_log2FC) >= log2FC.cutoff & - abs(resultsTable$mRNA_log2FC - resultsTable$RPF_log2FC) >= log2FC.cutoff ) - colornames <- seq_gradient_pal("#FFF1AF","#FFDE13")(resultsTable$colorscale[variable]) - points(resultsTable$mRNA_log2FC[variable],resultsTable$RPF_log2FC[variable],pch=pch,col=alpha(colornames,0.8),cex=cex) - leg <- c(leg, "opposite change") - leg.col <- c(leg.col, "#FFDE13") - - abline(h= log2FC.cutoff, lty=2, col="gray") - abline(v= log2FC.cutoff, lty=2, col="gray") - abline(h= -log2FC.cutoff, lty=2, col="gray") - abline(v= -log2FC.cutoff, lty=2, col="gray") - - legend("bottomright", legend=leg,pch=pch, col=leg.col, bty="n",cex=cex) -} - - -#' Scatterplot of log2 RPF-to-mRNA ratios. -#' -#' A simple function that plots the log2 RPF-to-mRNA ratios in two conditions. -#' Different classes of genes are labeled with different of color. -#' Each dot, representing a particular gene, are color coded by P-value (-log10) -#' -#' @docType methods -#' @name plotRs -#' @rdname plotRs -#' @aliases plotRs -#' -#' @importFrom scales -#' -#' @param object a xtailResults -#' @ explort - -plotRs <- function(object, log2R.cutoff = 1, cex=1, xlim, ylim, ..., cex.lab, cex.axis, cex.main, cex.sub, pch) -{ - list.of.packages <- "scales" - if(list.of.packages %in% installed.packages()[,"Package"]){ - library("scales") - }else{ - message('please install "scales" before run this function') - } - - if(is.null(object$resultsTable)) stop("error, the object must be created by using the xtail function.") - if(!is(object, "xtailResults")) stop("error, the object must be created by using the xtail function.") - - if (log2R.cutoff <= 0 ) stop("log2R.cutoff must be larger than 0") - if (!missing(xlim)){ - if (length(xlim) != 2) stop("invalid xlim value, should have two elemtnts") - } - if (!missing(ylim)){ - if (length(ylim) != 2) stop("invalid ylim value, should have two elemtnts") - } - resultsTable <- object$resultsTable - resultsTable <- resultsTable[complete.cases(resultsTable),] - resultsTable <- resultsTable[order(resultsTable$pvalue_final, decreasing = TRUE),] - resultsTable$colorscale <- rescale(-log10(resultsTable$pvalue_final)) - - condition1_TE <- paste0(object$condition1,"_log2TE") - condition2_TE <- paste0(object$condition2, "_log2TE") - - if (missing(xlim)){ - xmin <- min(resultsTable[[condition1_TE]],na.rm=T) - 0.1 - xmax <- max(resultsTable[[condition1_TE]],na.rm=T) + 0.1 - xlim <- c(xmin,xmax) - } - if (missing(ylim)){ - ymin <- min(resultsTable[[condition2_TE]],na.rm=T) - 0.1 - ymax <- max(resultsTable[[condition2_TE]],na.rm=T) + 0.1 - ylim <- c(ymin,ymax) - } - if (missing(cex.lab)) cex.lab <- 1.2 - if (missing(cex.axis)) cex.axis <- 1 - if (missing(cex.main)) cex.main <- 1.2 - if (missing(cex.sub)) cex.sub <- 1 - if (missing(pch)) pch <- 20 - - #mRNA stable, RPF stable. - variable <- which(abs(resultsTable[[condition1_TE]] - resultsTable[[condition2_TE]]) <= log2R.cutoff | - (abs(resultsTable[[condition1_TE]]) < log2R.cutoff & abs(resultsTable[[condition2_TE]]) < log2R.cutoff)) - plot(resultsTable[[condition1_TE]][variable],resultsTable[[condition2_TE]][variable],pch=pch,col="gray90", - xlim=xlim,ylim=ylim,xlab = paste0(object$condition1," log2Rs"), ylab=paste0(object$condition2," log2Rs"),frame.plot=TRUE,cex=cex, - cex.lab=cex.lab, cex.axis=cex.axis, cex.main=cex.main, cex.sub=cex.sub, ...) - - #change, stable. - variable <- which( abs(resultsTable[[condition1_TE]]) >= log2R.cutoff & - abs(resultsTable[[condition2_TE]]) < log2R.cutoff & - abs(resultsTable[[condition1_TE]] - resultsTable[[condition2_TE]]) >= log2R.cutoff) - colornames <- seq_gradient_pal("#BFBBFF","#0D5FFF")(resultsTable$colorscale[variable]) - points(resultsTable[[condition1_TE]][variable],resultsTable[[condition2_TE]][variable], - pch=pch,col=alpha(colornames,0.8),cex=cex) - leg <- paste0(object$condition1," only") - leg.col <- "#0D5FFF" - - # stable, change. - variable <- which( abs(resultsTable[[condition1_TE]]) < log2R.cutoff & - abs(resultsTable[[condition2_TE]]) >= log2R.cutoff & - abs(resultsTable[[condition1_TE]] - resultsTable[[condition2_TE]]) >= log2R.cutoff ) - colornames <- seq_gradient_pal("#FFB69C","#FF2E06")(resultsTable$colorscale[variable]) - points(resultsTable[[condition1_TE]][variable],resultsTable[[condition2_TE]][variable],pch=pch,col=alpha(colornames,0.8),cex=cex) - leg <- c(leg, paste0(object$condition2," only")) - leg.col <- c(leg.col, "#FF2E06") - - # both change, homodirectional. - variable <- which( sign(resultsTable[[condition1_TE]]) * sign(resultsTable[[condition2_TE]]) > 0 & - abs(resultsTable[[condition1_TE]]) >= log2R.cutoff & - abs(resultsTable[[condition2_TE]]) >= log2R.cutoff & - abs(resultsTable[[condition1_TE]] - resultsTable[[condition2_TE]]) >= log2R.cutoff ) - colornames <- seq_gradient_pal("#D1F7AD","#75E805")(resultsTable$colorscale[variable]) - points(resultsTable[[condition1_TE]][variable],resultsTable[[condition2_TE]][variable],pch=pch,col=alpha(colornames,0.8),cex=cex) - leg <- c(leg, "homodirectional") - leg.col <- c(leg.col, "#75E805") - - # opposite change. - variable <- which( sign(resultsTable[[condition1_TE]]) * sign(resultsTable[[condition2_TE]]) < 0 & - abs(resultsTable[[condition1_TE]]) >= log2R.cutoff & - abs(resultsTable[[condition2_TE]]) >= log2R.cutoff & - abs(resultsTable[[condition1_TE]] - resultsTable[[condition2_TE]]) >= log2R.cutoff ) - colornames <- seq_gradient_pal("#FFF1AF","#FFDE13")(resultsTable$colorscale[variable]) - points(resultsTable[[condition1_TE]][variable],resultsTable[[condition2_TE]][variable],pch=pch,col=alpha(colornames,0.8),cex=cex) - leg <- c(leg, "opposite change") - leg.col <- c(leg.col, "#FFDE13") - - abline(h= log2R.cutoff, lty=2, col="gray") - abline(v= log2R.cutoff, lty=2, col="gray") - abline(h= -log2R.cutoff, lty=2, col="gray") - abline(v= -log2R.cutoff, lty=2, col="gray") - - legend("bottomright", legend=leg,pch=pch, col=leg.col, bty="n",cex=cex) -} - -#' volcano plot -#' -#' A simple function that plots log2 fold change of TE and -log10 of pvalues -#' The genes are color coded by P-value (-log10) -#' -#' -#' @docType methods -#' @name volcanoPlot -#' @rdname volcanoPlot -#' @aliases volcanoPlot -#' -#' @importFrom ggplot2 -#' -#' @param object a xtailResults -#' @ explort - -volcanoPlot <- function(object){ - list.of.packages <- "ggplot2" - if(list.of.packages %in% installed.packages()[,"Package"]){ - library("ggplot2") - }else{ - message('please install "ggplot2" before run this function') - } - - p <- ggplot(data = object$resultsTable, aes(x=log2FC_TE_final, y=-log10(pvalue_final))) - p <- p + geom_point(size=2.5, colour="#836FFF") - p <- p+ theme_bw() - p -} diff --git a/R/runXtail.R b/R/runXtail.R new file mode 100644 index 0000000..7f7a9e2 --- /dev/null +++ b/R/runXtail.R @@ -0,0 +1,104 @@ +#' xtail analysis wrapper for \code{SummarizedExperiment} objects +#' +#' The \code{\link[=xtail]{xtail}} function can be used directly with +#' \code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} +#' objects. The \code{mrna} and \code{rpf} data must be stored as two separate +#' assays. +#' +#' See \code{\link[=xtail]{xtail}} for more details on the analysis +#' function. +#' +#' @param x a \code{RpfSummarizedExperiment} object +#' +#' @param mrna_assay a scalar character. The name of the \code{assay} containing +#' the \code{mRNA} data. +#' +#' @param rpf_assay a scalar character. The name of the \code{assay} containing +#' the \code{rpf} data. +#' +#' @param ... +#' \itemize{ +#' \item{For \code{runXtail}: additional parameters passed on to +#' \code{\link[=xtail]{xtail}}.} +#' \item{For \code{addXtail}: additional parameters passed on to +#' \code{runXtail} and \code{\link[=xtail]{xtail}}.} +#' } +#' +#' @return A \code{\link[=xtail]{xtail}} object for \code{runXtail} or +#' an object of \code{class(x)}. +#' +#' @name runXtail +#' +#' @examples +#' data(xtaildata) +#' test.mrna <- xtaildata$mrna[1:100,] +#' test.rpf <- xtaildata$rpf[1:100,] +#' condition <- c("control","control","treat","treat") +#' +#' se <- SummarizedExperiment(assays = list(mrna = test.mrna, rpf = test.rpf), +#' colData = DataFrame(condition = condition)) +#' xtail <- runXtail(se, "mrna", "rpf", condition = colData(se)$condition, +#' bins = 1000, threads = 2) +#' xtail +#' +#' se <- addXtail(se, "mrna", "rpf", condition = colData(se)$condition, +#' bins = 1000, threads = 2) +#' rowData(se) +NULL + +#' @rdname runXtail +#' @export +setGeneric("runXtail", signature = c("x"), + function(x, mrna_assay, rpf_assay, ...) standardGeneric("runXtail") +) + +#' @rdname runXtail +#' @importFrom SummarizedExperiment assay +#' @export +setMethod("runXtail", signature = c(x = "SummarizedExperiment"), + function(x, mrna_assay, rpf_assay, ...){ + mrna <- assay(x,mrna_assay) + rpf <- assay(x,rpf_assay) + mrna <- .remove_zero_and_NA_rows(mrna) + rpf <- .remove_zero_and_NA_rows(rpf) + xtail(mrna = mrna, rpf = rpf, ...) + } +) + +.remove_zero_and_NA_rows <- function(mat){ + mat <- mat[!apply(apply(mat,1,is.na),2,all),] + mat <- mat[!apply(apply(mat,1,"==",0),2,all),] + mat +} + +#' @rdname runXtail +#' @export +setGeneric("addXtail", signature = c("x"), + function(x, ...) standardGeneric("addXtail") +) + +#' @rdname runXtail +#' @export +setMethod("addXtail", signature = c(x = "SummarizedExperiment"), + function(x, ...){ + xtail <- runXtail(x, ...) + .add_xtail_results(x, xtail) + } +) + +#' @importFrom SummarizedExperiment rowData rowData<- +#' @importFrom S4Vectors metadata metadata<- +.add_xtail_results <- function(x, xtail){ + # add xtail results + rd <- rowData(x) + table <- resultsTable(xtail) + rd[,colnames(table)] <- NA + rd[rownames(table),colnames(table)] <- table + rowData(x) <- rd + # add xtail metadata + metadata(x) <- c(metadata(x), + list(num = resultsNum(xtail), + conditions = conditions(xtail))) + # + x +} diff --git a/R/wrapper-functions.R b/R/wrapper-functions.R new file mode 100644 index 0000000..dd86cfc --- /dev/null +++ b/R/wrapper-functions.R @@ -0,0 +1,62 @@ +# Fit beta coefficients for negative binomial GLM +# +# This function estimates the coefficients (betas) for negative binomial generalized linear models. +# +# ySEXP n by m matrix of counts +# xSEXP m by k design matrix +# nfSEXP n by m matrix of normalization factors +# alpha_hatSEXP n length vector of the disperion estimates +# contrastSEXP a k length vector for a possible contrast +# beta_matSEXP n by k matrix of the initial estimates for the betas +# lambdaSEXP k length vector of the ridge values +# tolSEXP tolerance for convergence in estimates +# maxitSEXP maximum number of iterations +# useQRSEXP whether to use QR decomposition +# +# Note: at this level the betas are on the natural log scale +.fit_Beta <- function(ySEXP, + xSEXP, + nfSEXP, + alpha_hatSEXP, + contrastSEXP, + beta_matSEXP, + lambdaSEXP, + tolSEXP, + maxitSEXP, + useQRSEXP) { + if ( missing(contrastSEXP) ) { + # contrast is not required, just give 1,0,0,... + contrastSEXP <- c(1,rep(0,ncol(xSEXP)-1)) + } + # test for any NAs in arguments + arg.names <- names(formals(.fit_Beta)) + na.test <- vapply(mget(arg.names), function(x) any(is.na(x)), + logical(1)) + if (any(na.test)) { + stop(paste("in call to fitBeta, the following arguments contain NA:", + paste(arg.names[na.test],collapse=", ")), + call. = FALSE) + } + fitBeta2(ySEXP=ySEXP, xSEXP=xSEXP, nfSEXP=nfSEXP, alpha_hatSEXP=alpha_hatSEXP, + contrastSEXP=contrastSEXP, beta_matSEXP=beta_matSEXP, + lambdaSEXP=lambdaSEXP, tolSEXP=tolSEXP, maxitSEXP=maxitSEXP, + useQRSEXP=useQRSEXP) +} + +.xtail_test_wrapper <- function(X){ + res <- xtail_test(counts1[X,], + counts2[X,], + intercept1[X], + intercept2[X], + log2Ratio1[X], + log2Ratio2[X], + dispersion1[X,], + dispersion2[X,], + sizefactor1, + sizefactor2, + cond1, + cond2, + bins, + ci) + res +} diff --git a/R/xtail-class.R b/R/xtail-class.R new file mode 100755 index 0000000..afadf83 --- /dev/null +++ b/R/xtail-class.R @@ -0,0 +1,343 @@ +#' @rdname xtail +#' @export +setClass("xtail", + contains = "SimpleList") + +################################################################################ +# constructor + +# only to be used internally + +#' @importFrom S4Vectors DataFrame +.combine_results <- function(result_log2FC, result_log2R, condition, baseLevel){ + if(is.null(rownames(result_log2FC)) || is.null(rownames(result_log2R))){ + stop("Something went wrong") + } + # get the intersection of results and get them in the same order + intersect.genes <- intersect(rownames(result_log2FC), + rownames(result_log2R)) + result_log2R <- result_log2R[intersect.genes,] + result_log2FC <- result_log2FC[intersect.genes,] + + #result data frame + condition1_TE <- paste0(baseLevel,"_log2TE") + condition2_TE <- paste0(unique(condition)[2], "_log2TE") + res <- cbind(result_log2FC[,c("log2Ratio1","log2Ratio2","deltaTE","Pval")], + result_log2R[,c("log2Ratio1","log2Ratio2","deltaTE","Pval")]) + colnames(res) <- c("mRNA_log2FC","RPF_log2FC","log2FC_TE_v1","pvalue_v1", + condition1_TE,condition2_TE,"log2FC_TE_v2","pvalue_v2") + res <- DataFrame(res) + res +} + +#' @importFrom stats p.adjust +.adjust_p <- function(res, method){ + res$pvalue.adjust <- p.adjust(res$pvalue_final, method = method) + res +} + +#' @importFrom S4Vectors SimpleList +.new_xtail <- function(result_log2FC, result_log2R, condition, baseLevel, ci, + p.adjust.method){ + # input check + if (length(ci) != 1L || ci<0 || ci > 1 ) { + stop("ci must be a single numeric value between 0 and 1.", + call. = FALSE) + } + if (length(unique(condition))!=2) { + stop("There must be exactly two different conditions.", + call. = FALSE) + } + if (length(baseLevel) != 1L || !(baseLevel %in% condition)){ + stop("baseLevel is not in condition", call. = FALSE) + } + # + res <- .combine_results(result_log2FC, result_log2R, condition, baseLevel) + res$log2FC_TE_final <- 0 + res$pvalue_final <- 0 + res$pvalue.adjust <- 0 + log2FC_determine_num <- 0 + log2R_determine_num <- 0 + # empty results + na_result <- is.na(res$pvalue_v1) | is.na(res$pvalue_v2) + res[na_result,"log2FC_TE_final"] <- NA + res[na_result,"pvalue_final"] <- NA + # fold change better than ratio + fc_result <- res$pvalue_v1 > res$pvalue_v2 + fc_result[is.na(fc_result)] <- FALSE + log2FC_determine_num <- sum(!is.na(fc_result)) + res[fc_result,"log2FC_TE_final"] <- res[fc_result,"log2FC_TE_v1"] + res[fc_result,"pvalue_final"] <- res[fc_result,"pvalue_v1"] + # ratio better than fold change + ratio_result <- res$pvalue_v1 <= res$pvalue_v2 + ratio_result[is.na(ratio_result)] <- FALSE + log2R_determine_num <- sum(!is.na(ratio_result)) + res[ratio_result,"log2FC_TE_final"] <- res[ratio_result,"log2FC_TE_v2"] + res[ratio_result,"pvalue_final"] <- res[ratio_result,"pvalue_v2"] + + # + if(ci > 0){ + CI_string = paste0("CI(",100*ci,"%)") + res[[CI_string]] <- NA + res[fc_result,CI_string] <- paste0("[", + round(result_log2FC[fc_result,"lowci"],2L), + ",", + round(result_log2FC[fc_result,"highci"],2L), + "]") + res[ratio_result,CI_string] <- paste0("[", + round(result_log2R[ratio_result,"lowci"],2L), + ",", + round(result_log2R[ratio_result,"highci"],2L), + "]") + } + res <- .adjust_p(res, method = p.adjust.method) + # construct result object + xtail <- SimpleList(resultsTable = res, + log2FC_determine_num = log2FC_determine_num, + log2R_determine_num = log2R_determine_num, + condition1 = baseLevel, + condition2 = unique(condition[condition != baseLevel]) ) + class(xtail) <- "xtail" + validObject(xtail) + xtail +} + + +################################################################################ +# validity + +XTAIL_OBJ_NAMES <- c("resultsTable","log2FC_determine_num", + "log2R_determine_num","condition1","condition2") + +.valid_xtail <- function(object){ + f <- XTAIL_OBJ_NAMES %in% names(object) + if(!all(f)){ + return(paste0("Missing elements in xtail object: '", + paste(XTAIL_OBJ_NAMES[f],"', '"), + "'")) + } + if(!is(object$resultsTable,"DataFrame")){ + return("results table is not a DataFrame") + } + if(!is.integer(object$log2FC_determine_num) || + length(object$log2FC_determine_num) != 1L){ + return("log2FC_determine_num is not an integer value") + } + if(!is.integer(object$log2R_determine_num)|| + length(object$log2R_determine_num) != 1L){ + return("log2R_determine_num is not an integer value") + } + if(!is.character(object$condition1) || + length(object$condition1) != 1L){ + return("condition1 is not a character value") + } + if(!is.character(object$condition2)|| + length(object$condition2) != 1L){ + return("condition2 is not a character value") + } + return(NULL) +} + +setValidity("xtail", .valid_xtail) + +################################################################################ +# accessors + +#' Results table of xtail results +#' +#' To retrieve the results from the xtail run use one of the accessor functions. +#' +#' +#' @param object a \code{xtail} object +#' +#' @param sort.by the column to sort with. Defaults to \code{NULL} to disable +#' sorting. +#' +#' @param log2FCs \code{TRUE} or \code{FALSE}: Should log2 fold change values be +#' returned? (defaults to \code{TRUE}) +#' +#' @param log2Rs \code{TRUE} or \code{FALSE}: Should log2 ratio values be +#' returned? (defaults to \code{TRUE}) +#' +#' @param ... optional arguments. Currently not used +#' +#' @return a \code{DataFrame} with the results or numeric vectors +#' +#' @name xtail-accessors +#' +#' @aliases resultsTable +#' +#' @examples +#' data(xtailres) +#' resultsTable(xtailres) +#' conditions(xtailres) +#' resultsNum(xtailres) +#' +#' # sorting or results +#' resultsTable(xtailres, sort.by = "pvalue.adjust") +NULL + +#' @rdname xtail-accessors +#' @importFrom BiocGenerics conditions +#' @export +setMethod("conditions", signature = c(object="xtail"), + function(object){ + c(object$condition1,object$condition2) + } +) + +#' @rdname xtail-accessors +#' @export +setGeneric("resultsNum", signature = c("object"), + function(object, ...) + standardGeneric("resultsNum")) + +#' @rdname xtail-accessors +#' @export +setMethod("resultsNum", signature = c("xtail"), + function(object, ...){ + c(numFoldChange = object$log2FC_determine_num, + numRatio = object$log2R_determine_num) + } +) + +#' @rdname xtail-accessors +#' @export +setGeneric("resultsTable", + function(object,...) + standardGeneric("resultsTable")) + +#' @rdname xtail-accessors +#' @export +setMethod("resultsTable", signature = c(object="xtail"), + function(object, + sort.by = NULL, + log2FCs = FALSE, + log2Rs = FALSE, + ...){ + # input check + if(!is.logical(log2FCs) || length(log2FCs) != 1L || is.na(log2FCs)){ + stop("'log2FCs' must be TRUE or FALSE",call. = FALSE) + } + if(!is.logical(log2Rs) || length(log2Rs) != 1L || is.na(log2Rs)){ + stop("'log2Rs' must be TRUE or FALSE",call. = FALSE) + } + # + x <- object$resultsTable + if(!is.null(sort.by)){ + sort.by <- match.arg(sort.by, c("pvalue.adjust", "log2FC_TE_v1", + "log2FC_TE_v2", "log2FC_TE_final", + "pvalue_v1" , "pvalue_v2", + "pvalue_final")) + # sort + #absolute TE fold change. + if(sort.by %in% c("log2FC_TE_v1","log2FC_TE_v2","log2FC_TE_final")){ + tefc <- abs(x[[sort.by]]) + } else { + tefc <- abs(x[["log2FC_TE_final"]]) + } + + #pvalue order + if(sort.by %in% c("pvalue_v1","pvalue_v2","pvalue_final", "pvalue.adjust")) { + pvfc <- object$resultsTable[[sort.by]] + } + + # out + o <- switch(sort.by, + "log2FC_TE_v1" = order(tefc, decreasing = TRUE), + "log2FC_TE_v2" = order(tefc, decreasing = TRUE), + "log2FC_TE_final" = order(tefc, decreasing = TRUE), + "pvalue_v1" = order(pvfc, -tefc), + "pvalue_v2" = order(pvfc, -tefc), + "pvalue_final" = order(pvfc, -tefc), + "pvalue.adjust" = order(pvfc, -tefc) + ) + x <- x[o,] + } + # remove deselected columns + if (!log2Rs){ + cond <- paste0(conditions(object), "_log2TE") + x[[cond[1L]]] <- NULL + x[[cond[1L]]] <- NULL + } + if (!log2FCs){ + x$mRNA_log2FC <- NULL + x$RPF_log2FC <- NULL + } + # + x + } +) + +################################################################################ +# show & summary + +.show_xtail <- function(object, alpha = 0.1){ + table <- resultsTable(object) + nums <- resultsNum(object) + all_num <- nrow(table) + up_num <- sum(table$log2FC_TE_final > 0 & table$pvalue.adjust < alpha, + na.rm = TRUE) + down_num <- sum(table$log2FC_TE_final < 0 & table$pvalue.adjust < alpha, + na.rm = TRUE) + cat("A xtail object:\n") + cat("Number of genes tested:", all_num,"\n") + cat("Number of the log2FC and log2R used in determining the final p-value:\n") + cat(" log2FC:", nums["numFoldChange"],"\n") + cat(" log2R :", nums["numRatio"],"\n") + cat("\n") + cat("Number of result with adjusted pvalue < ", alpha, "\n") + cat(" log2FC_TE > 0 (up) :", up_num, "\n") + cat(" log2FC_TE < 0 (down):", down_num,"\n") +} + +setMethod("show", signature = "xtail", + function(object){ + .show_xtail(object) + } +) + +#' @rdname xtail-accessors +#' +#' @param alpha cut off for summarizing results. Only results with a adjusted +#' p-value lower than \code{alpha} will be reported. +#' +#' @export +summary <- function(object, ...){ + UseMethod("summary") +} +#' @rdname xtail-accessors +#' @export +summary.xtail <- function(object, alpha = 0.1, ...){ + .show_xtail(object, alpha = alpha) +} + +################################################################################ + +#' Write xtail results as table +#' +#' xtail results can be directly written to file using \code{write.xtail} +#' +#' @param object a \code{xtail} object +#' +#' @param ... arguments passed onto \code{\link[=resultsTable]{resultsTable}} +#' or \code{write.table}. Should be named arguments to avoid clashes. +#' +#' @return invisible result from \code{write.table} +#' +#' @name write.xtail +#' +#' @seealso +#' \code{\link[utils:write.table]{write.table}} +#' +#' @importFrom utils write.table +#' +#' @export +#' +#' @examples +#' data(xtailres) +#' write.xtail(xtailres, file = tempfile()) +write.xtail <- function(object, ...){ + x <- resultsTable(object, ...) + write.table(x, ...) +} diff --git a/R/xtail-package.R b/R/xtail-package.R new file mode 100644 index 0000000..01715ed --- /dev/null +++ b/R/xtail-package.R @@ -0,0 +1,32 @@ +#' A tool to quantitatively assess differential translations with ribosome profiling data. +#' +#' By pairwise comparisons of ribosome profiling data, Xtail +#' identifies differentially translated genes across two experimental or +#' physiological conditions. +#' +#' +#' @references Zhengtao Xiao, Qin Zou, Yu Liu, and Xuerui Yang: Genome-wide +#' assessment of differential translations with ribosome profiling data. +#' +#' @docType package +#' @name xtail-package +#' @author Zhengtao xiao +NULL + +#' @import methods +#' @import Rcpp +#' @useDynLib xtail +NULL + +#' xtail example data +#' +#' The \code{xtail} package includes some example data +#' +#' @name xtaildata +#' @format a list of matrices +#' @usage data(xtaildata) +"xtaildata" +#' @rdname xtaildata +#' @format an example results +#' @usage data(xtailres) +"xtailres" diff --git a/R/xtail.R b/R/xtail.R index cd45106..e4b4f40 100755 --- a/R/xtail.R +++ b/R/xtail.R @@ -1,217 +1,414 @@ -#' @useDynLib xtail -xTest <- function(object1, object2,threads,bins,baseLevel, ci){ - intersect.genes <- intersect(rownames(object1), rownames(object2)) - object1 <- object1[intersect.genes,,drop=FALSE] - object2 <- object2[intersect.genes,,drop=FALSE] - counts1 <- counts(object1) - counts1[which(counts1==0)] = 1 - counts2 <- counts(object2) - counts2[which(counts2==0)] = 1 - intercept1 <- mcols(object1)$Intercept - intercept2 <- mcols(object2)$Intercept - dispersion1 <- dispersionMatrix(object1) - dispersion2 <- dispersionMatrix(object2) - resName1 <- resultsNames(object1)[2] - resName2 <- resultsNames(object2)[2] - log2Ratio1 <- mcols(object1)[[resName1]] - log2Ratio2 <- mcols(object2)[[resName2]] - sizefactor1 <- sizeFactors(object1) - sizefactor2 <- sizeFactors(object2) - cond1 <- as.numeric(colData(object1)$condition) - 1 - cond2 <- as.numeric(colData(object2)$condition) - 1 - ## Estimate the probabilities of log2R and log2FC - ## parallel used for speeding up - rowNo <- nrow(counts1) - if (is.na(threads)){ - ## automaticly detect the number of CPU cores. - cluster <- makeCluster(detectCores()) - }else{ - cluster <- makeCluster(threads) - } - clusterExport(cluster, c('counts1','counts2','intercept1','intercept2','log2Ratio1','log2Ratio2','dispersion1','dispersion2','sizefactor1','sizefactor2','cond1','cond2','bins','ci'),envir=environment()) - res <- clusterMap(cluster, xTestWrapper, i = c(1:rowNo)); - stopCluster(cluster) - res <- matrix(unlist(res),ncol=4,byrow=T,dimnames=list(intersect.genes, c("deltaTE","Pval","lowci","highci"))) - res <- data.frame(res) - res$log2Ratio1 <- log2Ratio1 - res$log2Ratio2 <- log2Ratio2 - res -} +#' A tool to quantitatively assess differential translations with ribosome profiling data. +#' +#' By pairwise comparisons of ribosome profiling data, Xtail +#' identifies differentially translated genes across two experimental or +#' physiological conditions. +#' +#' @references Zhengtao Xiao, Qin Zou, Yu Liu, and Xuerui Yang: Genome-wide +#' assessment of differential translations with ribosome profiling data. +#' +#' @param mrna a matrix or data frame of raw mRNA count data whose rows +#' correspond to genes and columns correspond to samples. The column names +#' should be non-empty, and in same order with condition. +#' @param rpf a matrix or data frame of raw RPF count data whose rows correspond +#' to genes and columns correspond to samples.The column names should be +#' non-empty, and in same order with condition. +#' @param condition condition labels corresponding to the order of samples in +#' mrna and rpf. There must be exactly two unique values. +#' @param baseLevel The baseLevel indicates which one of the two conditions will +#' be compared against by the other one. If not specified, \code{Xtail} will +#' return results of comparing the second condition over the first one. +#' @param minMeanCount \code{Xtail} uses the average expression level of each +#' gene, across all samples as filter criterion and it omits all genes with +#' mean counts below minMeanCount. +#' @param ci The level of confindence to get credible intervals of log2 fold +#' change of translational efficiency (TE), for example 0.95. +#' @param normalize Whether normalization should be done (TRUE \\ FALSE). If +#' missing, \code{Xtail} will perform median-of-ratios normlazation by +#' default. +#' @param p.adjust.method The method to use for adjusting multiple comparisons, by +#' default "BH", see \code{?p.adjust} +#' @param threads The number of CPU cores used. By default, all available cores +#' are used. +#' @param bins The number of bins used for calculating the probability density +#' of log2FC or log2R (default is 10000). This paramater will determine +#' accuracy of pvalue. Set it small for a very quick test run. +#' +#' @return a \code{xtail} object +#' +#' @details No missing values are allowed in input data mrna and rpf. +#' +#' Duplicate row names (gene names or gene ids) are not allowed. +#' +#' \code{Xtail} takes in raw read counts of RPF and mRNA, and performs +#' median-of-ratios normalization. Alternatively, users can provide normalized +#' read counts and skip the built-in normal by setting "normalize" to FALSE. +#' +#' The step of estimation of the probability distributions, for log2FC or +#' log2R, will execute slowly in the current implementation, but can be +#' speeded up by running on multiple cores using the parallel library. By +#' default, the "detectCores" function in parallel library is used to +#' determine the number of CPU cores in the machine on which R is running. To +#' adjust the number of cores used, use "threads" argument to assign. +#' +#' @name xtail +#' +#' @author Zhengtao xiao +#' +#' @examples +#' #load the data +#' data(xtaildata) +#' # Get the mrna count data and rpf count data. For the example only the first +#' # 100 are used +#' test.mrna <- xtaildata$mrna[1:100,] +#' test.rpf <- xtaildata$rpf[1:100,] +#' +#' #Assign condition labels to samples. +#' condition <- c("control","control","treat","treat") +#' +#' #run xtail +#' test.results <- xtail(test.mrna,test.rpf,condition, threads = 2) +#' test.results +NULL +XTAIL_DISPERSION_ASSAY <- "dispersion" -xTestWrapper <- function(i){ - res <- xtail_test(counts1[i,],counts2[i,],intercept1[i],intercept2[i],log2Ratio1[i],log2Ratio2[i],dispersion1[i,],dispersion2[i,],sizefactor1,sizefactor2,cond1,cond2,bins,ci) -} +.norm_mrna_rpf <- function(mrna, rpf, minMeanCount){ + # + keep.genes <- rowMeans(mrna) >= minMeanCount + mrna.keep <- rownames(mrna)[keep.genes] + keep.genes <- rowMeans(rpf) >= minMeanCount + rpf.keep <- rownames(rpf)[keep.genes] + ## merge genes in mrna and rpf + keep.genes <- intersect(mrna.keep,rpf.keep) + mrna <- mrna[keep.genes,,drop=FALSE] + rpf <- rpf[keep.genes,,drop=FALSE] -estimateFun <- function(countData, condition, baseLevel, libsize, dispers){ - ## using the DESeqDataSet to store data - colData <- data.frame(row.names = colnames(countData), condition=condition) - dataSet <- DESeqDataSetFromMatrix(countData = countData, colData = colData, design = ~condition) - colData(dataSet)$condition <- relevel(colData(dataSet)$condition, baseLevel) - - # normalization - if (missing(libsize)){ - dataSet <- suppressMessages(estimateSizeFactors(dataSet)) - }else{ - sizeFactors(dataSet) <- libsize - } + ## if the colnames of mrna and rpf are same, add characters to distinguish them. + if (sum(colnames(mrna) == colnames(rpf)) >= 1){ + colnames(mrna) = paste0("mrna_",colnames(mrna)) + colnames(rpf) = paste0("rpf_",colnames(rpf)) + } + list(mrna = mrna, rpf = rpf) +} - # estimateDispersion - if (missing(dispers)) - { - if (ncol(countData) == 2){ - object = dataSet #creat a copy of a dataSet for estimating dispersion - design(object) = formula(~1) # take two conditon samples as replicates for estimating dispersion - object = estimateDispersionsGeneEst(object) - #fit - dispFitFun <- xtailDispersionFit(mcols(object)$baseMean, mcols(object)$dispGeneEst) - #attr(dispFitFun, "fitType") = "bin quantile" - fittedDisps <- dispFitFun(mcols(object)$baseMean) - dispersionMatrix(dataSet) = matrix(rep(fittedDisps, ncol(dataSet)), ncol=ncol(dataSet), byrow=FALSE) - }else{ - dataSet <- suppressMessages(estimateDispersions(dataSet)) - dispersionMatrix(dataSet) <- matrix(rep(dispersions(dataSet), ncol(dataSet)),ncol=ncol(dataSet), byrow=FALSE) - } - }else{ - dispersionMatrix(dataSet) <- dispers - } +#' @importFrom stats median +.estimate_size_factors_for_matrix <- function(counts, + locfunc = stats::median){ + loggeomeans <- rowMeans(log(counts)) + if (all(is.infinite(loggeomeans))) { + stop("every gene contains at least one zero, cannot compute log ", + "geometric means", + call. = FALSE) + } + sf <- apply(counts, 2, function(cnts) { + exp(locfunc((log(cnts) - loggeomeans)[is.finite(loggeomeans) & cnts > 0])) + }) + sf +} - # estimate log2FC or log2R - dataSet <- suppressMessages(estimateMLEForBeta(dataSet, modelMatrixType="standard")) - dataSet +.get_size_factors <- function(mrna, rpf, normalize){ + #normalize together + if (normalize){ + message("Calculating the library size factors") + pool_sizeFactor <- .estimate_size_factors_for_matrix(cbind(mrna,rpf)) + mrna_sizeFactor <- pool_sizeFactor[seq_len(ncol(mrna))] + rpf_sizeFactor <- pool_sizeFactor[(ncol(mrna)+1):(ncol(mrna)+ncol(rpf))] + } else { + mrna_sizeFactor <- rep(1, ncol(mrna)) + rpf_sizeFactor <- rep(1,ncol(rpf)) + } + list(mrna_sizeFactor = mrna_sizeFactor, rpf_sizeFactor = rpf_sizeFactor) } + +#' @rdname xtail +#' @importFrom SummarizedExperiment assay #' @export -xtail <- function(mrna, rpf, condition, baseLevel = NA, minMeanCount = 1, normalize = TRUE, p.adjust.method ="BH", threads=NA,bins=10000,ci = 0) -{ +xtail <- function(mrna, + rpf, + condition, + baseLevel = NA, + minMeanCount = 10, + normalize = TRUE, + p.adjust.method ="BH", + threads = NA, + bins = 10000L, + ci = 0){ + # input checks + ## there must be exactly two different conditon + if (length(unique(condition))!=2) { + stop("There must be exactly two different conditions.", + call. = FALSE) + } + # normalize must be TRUE or FALSE + if(!is.logical(normalize) || length(normalize) != 1L || is.na(normalize)){ + stop("'normalize' must be TRUE or FALSE.", call. = FALSE) + } ## default baseLevel is the first coniditon - if (is.na(baseLevel)) {baseLevel <- condition[1]} + if (is.na(baseLevel)) { + baseLevel <- condition[1] + } ## if the baseLevel is in conditon - if (!is.element(baseLevel, condition)) stop("baseLevel is not in condition") + if (length(baseLevel) != 1L || !(baseLevel %in% condition)){ + stop("baseLevel is not in condition", call. = FALSE) + } ## if the colnames of mrna and rpf are matched - if (ncol(mrna) != ncol(rpf)) stop("the mrna and rpf must have same number of columns") + if(is.null(rownames(mrna)) || is.null(rownames(rpf))){ + stop("rownames of 'mrna' and 'rpf' must be set.", + call. = FALSE) + } + if (ncol(mrna) != ncol(rpf)) { + stop("'mrna' and 'rpf' must have same number of columns.", + call. = FALSE) + } ## if the colnames and condition are match - if(length(condition)!=ncol(mrna)) stop("condition must have same length as the number of columns of rna") - ## ther must be exactly two different conditon - if (length(unique(condition))!=2) stop("There must be exactly two different conditions") + if(length(condition) != ncol(mrna)) { + stop("condition must have same length as the number of columns of rna.", + call. = FALSE) + } ## check confidence interval - if (ci<0 ) stop("ci must be non-negative") + if (length(ci) != 1L || ci<0 || ci > 1 ) { + stop("ci must be a single numeric value between 0 and 1.", + call. = FALSE) + } ## check pvalue.adjust method supported.padj <- c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr") - if (!is.element(p.adjust.method, supported.padj)) stop(paste0("The adjustment methods must be one of ",paste(supported.padj,collapse=", "))) - - ## filter genes with low count number - if (minMeanCount<1) stop("minMeanCount needs to be at least 1") - keep.genes <- rowMeans(mrna) >= minMeanCount - mrna.keep <- rownames(mrna)[keep.genes] - keep.genes <- rowMeans(rpf) >= minMeanCount - rpf.keep <- rownames(rpf)[keep.genes] - ## merge genes in mrna and rpf - keep.genes <- intersect(mrna.keep,rpf.keep) - mrna <- mrna[keep.genes,,drop=FALSE] - rpf <- rpf[keep.genes,,drop=FALSE] - - ## if the colnames of mrna and rpf are same, add characters to distinguish them. - if (sum(colnames(mrna) == colnames(rpf)) >= 1){ - colnames(mrna) = paste0("mrna_",colnames(mrna)) - colnames(rpf) = paste0("rpf_",colnames(rpf)) + if (length(p.adjust.method) != 1L || !(p.adjust.method %in% supported.padj)) { + stop(paste0("The adjustment methods must be one of '", + paste(supported.padj,collapse="', '")), + "'.", + call. = FALSE) } - #normalize together - if (normalize){ - message("Calculating the library size factors") - pool_sizeFactor <- estimateSizeFactorsForMatrix(cbind(mrna,rpf)) - mrna_sizeFactor <- pool_sizeFactor[1:ncol(mrna)] - rpf_sizeFactor <- pool_sizeFactor[(ncol(mrna)+1):(ncol(mrna)+ncol(rpf))] - }else{ - mrna <- round(mrna) - rpf <- round(rpf) - mrna_sizeFactor <- rep(1, ncol(mrna)) - rpf_sizeFactor <- rep(1,ncol(rpf)) + if (length(minMeanCount) != 1L || minMeanCount < 1){ + stop("minMeanCount must be single numeric value and be larger than 1.", + call. = FALSE) } - + # norm the input matrices and + mrna_rpf_normed <- .norm_mrna_rpf(mrna, rpf, minMeanCount) + mrna <- mrna_rpf_normed$mrna + rpf <- mrna_rpf_normed$rpf + size_factors <- .get_size_factors(mrna, rpf, normalize) + mrna_sizeFactor <- size_factors$mrna_sizeFactor + rpf_sizeFactor <- size_factors$rpf_sizeFactor + if(!normalize){ + mrna <- round(mrna) + rpf <- round(rpf) + } + ############################################################################ ## 1. Estimate the difference of log2FC between mRNA and RPF #If no replicate, we assume no more than one-third of genes' expression changed. message ("1. Estimate the log2 fold change in mrna") - mrna_object = estimateFun(mrna,condition,baseLevel,mrna_sizeFactor) + mrna_object = .estimate(mrna, + condition, + baseLevel, + mrna_sizeFactor) message ("2. Estimate the log2 fold change in rpf") - rpf_object = estimateFun(rpf,condition,baseLevel,rpf_sizeFactor) + rpf_object = .estimate(rpf, + condition, + baseLevel, + rpf_sizeFactor) message ("3. Estimate the difference between two log2 fold changes") - result_log2FC = xTest(mrna_object,rpf_object,threads,bins,baseLevel,ci) - + result_log2FC = .xtail_test(mrna_object, + rpf_object, + threads, + bins, + baseLevel, + ci) + ############################################################################ ### 2. Estimate the difference of log2R between control and treatment - condition1_mrna <- mrna[,condition==baseLevel,drop=FALSE] - condition1_rpf <- rpf[,condition==baseLevel,drop=FALSE] - condition2_mrna <- mrna[,condition!=baseLevel,drop=FALSE] - condition2_rpf <- rpf[,condition!=baseLevel,drop=FALSE] + cond_base <- condition == baseLevel + condition1_mrna <- mrna[,cond_base,drop=FALSE] + condition1_rpf <- rpf[,cond_base,drop=FALSE] + condition2_mrna <- mrna[,!cond_base,drop=FALSE] + condition2_rpf <- rpf[,!cond_base,drop=FALSE] condition1_counts <- cbind(condition1_mrna,condition1_rpf) condition2_counts <- cbind(condition2_mrna,condition2_rpf) - condition1_sizeFactor <- c(mrna_sizeFactor[condition==baseLevel],rpf_sizeFactor[condition==baseLevel]) - condition2_sizeFactor <- c(mrna_sizeFactor[condition!=baseLevel],rpf_sizeFactor[condition!=baseLevel]) + condition1_sizeFactor <- c(mrna_sizeFactor[cond_base],rpf_sizeFactor[cond_base]) + condition2_sizeFactor <- c(mrna_sizeFactor[!cond_base],rpf_sizeFactor[!cond_base]) - condition1_disper <- cbind(dispersionMatrix(mrna_object)[,condition==baseLevel,drop=FALSE], dispersionMatrix(rpf_object)[,condition==baseLevel,drop=FALSE]) - condition2_disper <- cbind(dispersionMatrix(mrna_object)[,condition!=baseLevel,drop=FALSE], dispersionMatrix(rpf_object)[,condition!=baseLevel,drop=FALSE]) - condition1 <- c(paste0(condition[condition == baseLevel],"_mRNA"), paste0(condition[condition == baseLevel],"_rpf")) + condition1_disper <- cbind(assay(mrna_object,XTAIL_DISPERSION_ASSAY)[,cond_base,drop=FALSE], + assay(rpf_object,XTAIL_DISPERSION_ASSAY)[,cond_base,drop=FALSE]) + condition2_disper <- cbind(assay(mrna_object,XTAIL_DISPERSION_ASSAY)[,!cond_base,drop=FALSE], + assay(rpf_object,XTAIL_DISPERSION_ASSAY)[,!cond_base,drop=FALSE]) + condition1 <- c(paste0(condition[cond_base],"_mRNA"), + paste0(condition[cond_base],"_rpf")) baseLevel1 <- paste0(baseLevel,"_mRNA") - condition2 <- c(paste0(condition[condition != baseLevel],"_mRNA"), paste0(condition[condition != baseLevel],"_rpf")) - baseLevel2 <- paste0(unique(condition)[2],"_mRNA") + condition2 <- c(paste0(condition[!cond_base],"_mRNA"), + paste0(condition[!cond_base],"_rpf")) + baseLevel2 <- paste0(unique(condition[!cond_base]),"_mRNA") # message ("4. Estimate the log2 ratio in first condition") - condition1_object <- estimateFun(condition1_counts,condition1,baseLevel1,condition1_sizeFactor,condition1_disper) + condition1_object <- .estimate(condition1_counts, + condition1,baseLevel1, + condition1_sizeFactor, + condition1_disper) message ("5. Estimate the log2 ratio in second condition") - condition2_object <- estimateFun(condition2_counts,condition2,baseLevel2,condition2_sizeFactor,condition2_disper) + condition2_object <- .estimate(condition2_counts, + condition2,baseLevel2, + condition2_sizeFactor, + condition2_disper) message ("6. Estimate the difference between two log2 ratios") - result_log2R = xTest(condition1_object,condition2_object,threads,bins,baseLevel,ci) - + result_log2R = .xtail_test(condition1_object, + condition2_object, + threads, + bins, + baseLevel, + ci) + ############################################################################ ## 3. combine the log2FC and log2R results and report + xtail <- .new_xtail(result_log2FC, result_log2R, condition, baseLevel, ci, + p.adjust.method) + nums <- resultsNum(xtail) + message("Number of the log2FC and log2R used in determining the final p-value") + message(paste0(" log2FC: ", nums["numFoldChange"])) + message(paste0(" log2R: ", nums["numRatio"])) + xtail +} - intersect.genes <- intersect(rownames(result_log2FC), rownames(result_log2R)) - result_log2R <- result_log2R[intersect.genes,] - result_log2FC <- result_log2FC[intersect.genes,] - - #result data frame - condition1_TE <- paste0(baseLevel,"_log2TE") - condition2_TE <- paste0(unique(condition)[2], "_log2TE") - final_result <- cbind(result_log2FC[,c("log2Ratio1","log2Ratio2","deltaTE","Pval")],result_log2R[,c("log2Ratio1","log2Ratio2","deltaTE","Pval")]) - colnames(final_result) <- c("mRNA_log2FC","RPF_log2FC","log2FC_TE_v1","pvalue_v1",condition1_TE,condition2_TE,"log2FC_TE_v2","pvalue_v2") - final_result <- as.data.frame(final_result) - final_result$log2FC_TE_final <- 0 - final_result$pvalue_final <- 0 - final_result$pvalue.adjust <- 0 - if (ci>0){ - resultCI <- NA - } - log2FC_determine_num <- 0 - log2R_determine_num <- 0 - for (i in 1:nrow(final_result)){ - if (is.na(final_result[i,"pvalue_v1"]) || is.na(final_result[i,"pvalue_v2"])){ - final_result$log2FC_TE_final[i] <- NA - final_result$pvalue_final[i] <- NA - if (ci>0) resultCI[i] <- NA - }else if(final_result[i,"pvalue_v1"] > final_result[i,"pvalue_v2"]){ - final_result$log2FC_TE_final[i] <- final_result[i,"log2FC_TE_v1"] - final_result$pvalue_final[i] <- final_result[i,"pvalue_v1"] - if (ci>0) resultCI[i] <- paste0("[",round(result_log2FC[i,"lowci"],2),",",round(result_log2FC[i,"highci"],2),"]") - log2FC_determine_num <- log2FC_determine_num + 1 - }else{ - final_result$log2FC_TE_final[i] <- final_result[i,"log2FC_TE_v2"] - final_result$pvalue_final[i] <- final_result[i,"pvalue_v2"] - if (ci>0) resultCI[i] <- paste0("[",round(result_log2R[i,"lowci"],2),",",round(result_log2R[i,"highci"],2),"]") - log2R_determine_num <- log2R_determine_num + 1 - } - } +#' @importFrom locfit locfit +#' @importFrom stats predict +.get_xtail_dispersion_fit_function <- function(rawmeans, + rawdisps, + quantilePercent = 0.35, + binnum = 50, + minDisp = 1e-8){ + useForFit <- rawdisps > 100 * minDisp + if(sum(useForFit) == 0){ + stop("all gene-wise dispersion estimates are within 2 orders of ", + "magnitude from the minimum value, and so the gene-wise ", + "estimates as final estimates.") + } + usedMeans <- rawmeans[useForFit] + usedDisps <- rawdisps[useForFit] + sortMeans <- usedMeans[order(usedMeans)] + sortDisps <- usedDisps[order(usedMeans)] + genenum <- length(sortMeans) + quantile_means <- c() + quantile_disps <- c() + for(i in seq(1,genenum,binnum)){ + num = min(binnum,genenum-i+1) + if(num<(binnum)){ + next + } + curbin_means <- sortMeans[i:(i+num-1)] + curbin_disps <- sortDisps[i:(i+num-1)] - final_result$pvalue.adjust = p.adjust(final_result$pvalue_final,method=p.adjust.method) - if (ci>0){ - CI_string = paste0("CI(",100*ci,"%)") - final_result[[CI_string]] = resultCI - } - message("Number of the log2FC and log2R used in determining the final p-value") - message(paste0(" log2FC: ", log2FC_determine_num)) - message(paste0(" log2R: ", log2R_determine_num)) - xtail_results <- list(resultsTable = final_result, log2FC_determine_num = log2FC_determine_num, - log2R_determine_num=log2R_determine_num,condition1=baseLevel, - condition2=unique(condition)[2] ) - class(xtail_results) <- c("xtailResults","list") - xtail_results + m <- curbin_means[order(curbin_disps)][seq_len(floor(num*quantilePercent))] + d <- curbin_disps[order(curbin_disps)][seq_len(floor(num*quantilePercent))] + quantile_means <- c(quantile_means, m) + quantile_disps <- c(quantile_disps, d) + } + dat <- data.frame(logDisps = log(quantile_disps), logMeans=log(quantile_means)) + fit <- locfit(logDisps~logMeans, data=dat[quantile_disps>=minDisp*10,,drop=FALSE]) + dispFit <- function(rawmeans){ + exp(predict(fit,data.frame(logMeans=log(rawmeans)))) + } + dispFit +} + +#' @import DESeq2 +#' @importFrom stats relevel formula +#' @importFrom S4Vectors mcols +#' @importFrom SummarizedExperiment assay assay<- +.estimate <- function(countData, condition, baseLevel, libsize, dispers = NULL){ + ## using the DESeqDataSet to store data + colData <- data.frame(row.names = colnames(countData), + condition = relevel(factor(condition), baseLevel)) + dataSet <- DESeqDataSetFromMatrix(countData = countData, + colData = colData, + design = ~ condition) + # colData(dataSet)$condition <- relevel(colData(dataSet)$condition, baseLevel) + + # normalization + if (missing(libsize)){ + dataSet <- suppressMessages(estimateSizeFactors(dataSet)) + } else { + sizeFactors(dataSet) <- libsize + } + + # estimateDispersion + if (is.null(dispers)){ + if (ncol(countData) == 2){ + object <- dataSet #creat a copy of a dataSet for estimating dispersion + design(object) <- formula(~1) # take two conditon samples as replicates for estimating dispersion + object <- estimateDispersionsGeneEst(object) + #fit + dispFitFun <- .get_xtail_dispersion_fit_function( + mcols(object)$baseMean, + mcols(object)$dispGeneEst) + #attr(dispFitFun, "fitType") = "bin quantile" + fittedDisps <- dispFitFun(mcols(object)$baseMean) + dispers <- matrix(rep(fittedDisps, ncol(dataSet)), + ncol = ncol(dataSet), + byrow = FALSE) + } else { + dataSet <- suppressMessages(estimateDispersions(dataSet)) + dispers <- matrix(rep(dispersions(dataSet), ncol(dataSet)), + ncol = ncol(dataSet), + byrow = FALSE) + } + } + assay(dataSet, XTAIL_DISPERSION_ASSAY, withDimnames = FALSE) <- dispers + # estimate log2FC or log2R + dataSet <- suppressMessages( + .estimate_MLE_for_Beta(dataSet, modelMatrixType = "standard")) + dataSet +} + + +#' @importFrom parallel makeCluster clusterExport detectCores parLapply +#' stopCluster +#' @importFrom S4Vectors mcols +#' @importFrom SummarizedExperiment assay colData +.xtail_test <- function(object1, object2,threads,bins,baseLevel, ci){ + intersect.genes <- intersect(rownames(object1), rownames(object2)) + object1 <- object1[intersect.genes,,drop=FALSE] + object2 <- object2[intersect.genes,,drop=FALSE] + counts1 <- counts(object1) + counts1[which(counts1==0)] = 1 + counts2 <- counts(object2) + counts2[which(counts2==0)] = 1 + intercept1 <- mcols(object1)$Intercept + intercept2 <- mcols(object2)$Intercept + dispersion1 <- assay(object1,XTAIL_DISPERSION_ASSAY) + dispersion2 <- assay(object2,XTAIL_DISPERSION_ASSAY) + resName1 <- resultsNames(object1)[2] + resName2 <- resultsNames(object2)[2] + log2Ratio1 <- mcols(object1)[[resName1]] + log2Ratio2 <- mcols(object2)[[resName2]] + sizefactor1 <- sizeFactors(object1) + sizefactor2 <- sizeFactors(object2) + cond1 <- as.numeric(colData(object1)$condition) - 1L + cond2 <- as.numeric(colData(object2)$condition) - 1L + ## Estimate the probabilities of log2R and log2FC + ## parallel used for speeding up + rowNo <- nrow(counts1) + # + if(is.na(threads)){ + ## automaticly detect the number of CPU cores. + cluster <- makeCluster(detectCores()) + } else { + cluster <- makeCluster(threads) + } + on.exit(stopCluster(cluster)) + clusterExport(cl = cluster, + varlist = c("counts1","counts2","intercept1","intercept2", + "log2Ratio1","log2Ratio2","dispersion1", + "dispersion2","sizefactor1","sizefactor2","cond1", + "cond2","bins","ci"), + envir = environment()) + res <- parLapply(cluster, + X = seq_len(rowNo), + fun = .xtail_test_wrapper) + res <- matrix(unlist(res), + ncol = 4L, + byrow = TRUE, + dimnames = list(intersect.genes, + c("deltaTE","Pval","lowci","highci"))) + res <- data.frame(res) + res$log2Ratio1 <- log2Ratio1 + res$log2Ratio2 <- log2Ratio2 + res } diff --git a/README.md b/README.md index 7a49da3..78d4800 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,19 @@ Genome-wide assessment of differential translations with ribosome profiling data # VERSION -1.1.5 +1.2.0 # Authors -Zhengtao Xiao, Qin Zou, Yu Liu and Xuerui Yang +Zhengtao Xiao # DESCRIPTION -The purpose of this package is to identify the genes undergoing differential translation across two conditions with ribosome profiling data. - +The xtail package is designed to identify genes exhibiting differential translation +between two experimental conditions by simultaneously analyzing changes in RNA-seq +and ribo-seq data. The estimation of changes in read counts are performed +using the DESeq2 package. + # REQUIREMENTS * R >= 3.2 @@ -24,7 +27,8 @@ The purpose of this package is to identify the genes undergoing differential tra # INSTALLATION -To install Xtail, please refer to INSTALL file in this directory. +Details of the installation process can be found in the INSTALL file located in the root directory of the package. +Additionally, a Docker image of xtail is available; please refer to this [page](https://hub.docker.com/r/yanglab/xtail) for more information. # CONTENTS @@ -40,10 +44,4 @@ or type: vignette("xtail") at the R-prompt. Xtail is licensed under the GPL version 3 or any later version -For more information please contact xzt13@mails.tsinghua.edu.cn and THUhry12@163.com - - - - - - +For more information please contact zhengtao.xiao[at]xjtu.edu.cn diff --git a/data/.directory b/data/.directory deleted file mode 100755 index 4e57b5e..0000000 --- a/data/.directory +++ /dev/null @@ -1,6 +0,0 @@ -[Dolphin] -Timestamp=2015,10,15,10,45,13 -Version=3 - -[Settings] -HiddenFilesShown=true diff --git a/data/xtaildata.rda b/data/xtaildata.rda index 7feb094..4472878 100755 Binary files a/data/xtaildata.rda and b/data/xtaildata.rda differ diff --git a/data/xtailres.rda b/data/xtailres.rda new file mode 100644 index 0000000..f30d4ef Binary files /dev/null and b/data/xtailres.rda differ diff --git a/inst/doc/xtail.Rnw b/inst/doc/xtail.Rnw deleted file mode 100755 index 2980936..0000000 --- a/inst/doc/xtail.Rnw +++ /dev/null @@ -1,222 +0,0 @@ -%\VignetteIndexEntry{xtail} -%\VignetteDepends{xtail} -%\VignetteKeywords{xtail} -%\VignettePackage{xtail} -%\VignetteEngine{knitr::knitr} - -% To compile this document -% library('knitr');rm(list=ls());knit('xtail.Rnw') - -\documentclass[12pt]{article} - - -<>= -library("knitr") -opts_chunk$set( - tidy=FALSE, - fig.show="hide", - cache=TRUE, - message=FALSE) -@ - -\title{Genome-wide assessment of differential translations \\ with ribosome profiling data \\-- the xtail package} -\author{Zhengtao Xiao$^{1-3}$, Qin Zou$^{1,3}$, Yu Liu$^{1-3}$, and Xuerui Yang$^{1-3}$ -\\[1em] \small{$^{1}$MOE Key Laboratory of Bioinformatics, } -\\ \small{$^{2}$Tsinghua-Peking Joint Center for Life Sciences,} -\\ \small{$^{3}$School of Life Sciences, Tsinghua University, Beijing 100084, China.}} - -<<>= -BiocStyle::latex() -@ - -\begin{document} - -<>= -library(knitr) -opts_chunk$set( -concordance=TRUE -) -@ - -\maketitle -\vspace{15em} - -\begin{center} - \begin{tabular}{ | l | } - \hline - \textbf{xtail version:} \Sexpr{packageVersion("xtail")} \\ - \\ - If you use xtail in published research, please cite: \\ - Z. Xiao, Q. Zou, Y. Liu, X. Yang: \textbf{Genome-wide assessment} \\ - \textbf{of differential translations with ribosome profiling data}. \\ - \emph{Nat Commun} 2016, \textbf{7}:11194. \\ - \url{http://www.ncbi.nlm.nih.gov/pubmed/27041671} \\ - \hline - \end{tabular} -\end{center} - -\newpage -\section{Introduction} - -This package, Xtail, is for identification of genes undergoing differential translation across two conditions with ribosome profiling data. Xtail is based on a simple assumption that if a gene is subjected to translational dyresgulation under certain exprimental or physiological condition, the change of its RPF abundance should be discoordinated with that of mRNA expression. Specifically, \verb'Xtail' consists of three major steps: (1) modeling of ribosome profiling data using negative binomial distribution (NB), (2) estabilishment of probability distributions for fold changes of mRNA or RPF (or RPF-to-mRNA ratios), and (3) evaluation of statistical significance and magnitude of differential translations. The differential translation of each gene is evaluated by two pipelines: -in the first one, \verb'Xtail' calculated the posterior probabilities for a range of mRNA or RPF fold changes, and eventually estabilished their probability distributions. These two distributions, represented as probability vectors, were then used to estabilish a joint probability distribution matrix, from which a new probability distribution were generated for differential translation. The P-values, point estimates and credible intervals of differential tranlsations were then calculated based on these results. In the other parallel pipline, \verb'Xtail' established probability distributions for RPF-to-mRNA ratios in two conditions and derived another distribution for differential translation. The more conserved set of results from these two parallel piplines was used as the final result. With this strategy, \verb'Xtail' performs quantification of differential translation for each gene, i.e., the extent to which a gene's translational rate is not coordinated with the change of the mRNA expression. - -By default, \verb'Xtail' adapts the strategy of DESeq2 [1] to normalize read counts of mRNA and RPF in all samples, and fits NB distributions with dispersions $\alpha$ and $\mu$. - -This guide provides step-by-step instructions on how to load data, how to excute the package and how to interpret output. - -\section{Data Preparation} - -The \verb'Xtail' package uses read counts of RPF and mRNA, in the form of rectangular table of values. The rows and columns of the table represent the genes and samples, respectively. Each cell in the \textsl{g-th} row and the \textsl{i-th} columns is the count number of reads mapped to gene \textsl{g} in sample \textsl{i}. - -Xtail takes in raw read counts of RPF and mRNA, and performs median-of-ratios normalization by default. This normalization method is also recommend by Reddy R. [2]. Alternatively, users can provide normalized read counts and skip the built-in normalization in Xtail. - -In this vignette, we select a published ribosome profiling dataset from human prostate cancer cell PC3 after mTOR signaling inhibition with PP242 [3]. This dataset consists of mRNA and RPF data for 11391 genes in two replicates from each of the two conditions("treatment" vs. "control"). - -\section{An Example} - -Here we run \verb'Xtail' with the ribosome profiling data described above. First we load the library and data. - -<>= -library(xtail) -data(xtaildata) -@ - - -Next we can view the first five lines of the mRNA (\verb'mrna') and RPF (\verb'rpf') elements of \verb'xtaildata'. - -<<>>= -mrna <- xtaildata$mrna -rpf <- xtaildata$rpf -head(mrna,5) -head(rpf,5) -@ - - -We assign condition labels to the columns of the mRNA and RPF data. - -<<>>= -condition <- c("control","control","treat","treat") -@ - - -Next, we run the main function, \Rfunction{xtail()}. By default, the second condition (here is "treat") would be compared against the first condition (here is "control"). Those genes with the minimum average expression of mRNA counts and RPF counts among all samples larger than 1 are used (can be changed by setting \verb'minMeanCount'). All the available CPU cores are used for running program. The argument \verb`"bins"` is the number of bins used for calculating the probability densities of log2FC and log2R. This paramater will determine accuracy of the final pvalue. Here, in order to keep the run-time of this vignette short, we will set \verb'bins' to "1000". Detailed description of the arguments of the \texttt{xtail} function can be found by typing \texttt{?xtail} or \texttt{help(xtail)} at the \textbf{R} prompt. - -<<>>= -test.results <- xtail(mrna,rpf,condition,bins=1000) -@ - -We can summarize some basic information of xtail results using the summary function (type \texttt{?summary} for further information). - -<>= -summary(test.results) -@ - -Now we can extract a results table using the function \Rfunction{resultsTable}, and examine the first five lines of the results table. - -<>= -test.tab <- resultsTable(test.results) -head(test.tab,5) -@ - -The results of fist pipline are named with suffix "\_v1", which are generated by comparing mRNA and RPF log2 fold changes: The element \verb'log2FC_TE_v1' represents the log2 fold change of TE; The \verb"pvalue_v1" represent statistical significance. The sencond pipline are named with suffix "\_v2", which are derived by comparing log2 ratios between two conditions: \verb'log2FC_TE_v2', and \verb'pvalue_v2' are log2 ratio of TE, and pvalues. Finally, the more conserved results (with larger-Pvalue) was select as the final assessment of differential translation, which are named with suffix "\_final". The \verb'pvalue.adjust' is the estimated false discovery rate corresponding to the \verb'pvalue_final'. - -Users can also get the log2 fold changes of mRNA and RPF, or the log2 ratios of two conditions by setting "log2FCs" or "log2Rs" as "TRUE" in resultsTable. And the results table can be sorted by assigning the "sort.by". Detailed description of the \texttt{resultsTable} function can be found by typing \texttt{?resultsTable}. - -Finally, the plain-text file of the results can be exported using the functions \textsl{write.csv} or \textsl{write.table}. - -<>= -write.table(test.tab,"test_results.txt",quote=F,sep="\t") -@ - -We also provide a very simple function, \texttt{write.xtail} (using the write.table function), to export the \verb'xtail' result (test.results) to a tab delimited file. - -<>= -write.xtail(test.results,"test_results.txt",quote=F,sep="\t") -@ - - -\section{Visualization} - -\subsection{plotFCs} - -In \verb'Xtail', the function \Rfunction{plotFCs} shows the result of the differential expression at the two expression levels, where each gene is a dot whose position is determined by its log2 fold change (log2FC) of transcriptional level (\verb'mRNA_log2FC'), represented on the x-axis, and the log2FC of translational level (\verb'RPF_log2FC'), represented on the y-axis (Figure \ref{fig:plotFCs}). The optional input parameter of \Rfunction{plotFCs} is \verb'log2FC.cutoff', a non-negative threshold value that will divide the genes into different classes: - -\begin{itemize} - \item \verb'blue': for genes whoes \verb'mRNA_log2FC' larger than \verb'log2FC.cutoff' (transcriptional level). - \item \verb'red': for genes whoes \verb'RPF_log2FC' larger than \verb'log2FC.cutoff' (translational level). - \item \verb'green': for genes changing homodirectionally at both level. - \item \verb'yellow': for genes changing antidirectionally at two levels. -\end{itemize} - - -<>= -plotFCs(test.results) -@ -\begin{figure}[h] -\centering -\includegraphics[width=.5\textwidth]{figure/plotFCs-1} -\caption{Scatter plot of log2 fold changes} -\label{fig:plotFCs} -\end{figure} - -Those genes in which the difference of \verb'mRNA_log2FC' and \verb'RPF_log2FC' did not exceed more than \verb'log2FC.cutoff' are excluded. The points will be color-coded with the \verb'pvalue_final' obtained with \verb'xtail' (more significant p values having darker color). By default the \verb'log2FC.cutoff' is 1. - - -\subsection{plotRs} - -Similar to \Rfunction{plotFCs}, the function \Rfunction{plotRs} shows the RPF-to-mRNA ratios in two conditions, where the position of each gene is determined by its RPF-to-mRNA ratio (log2R) in two conditions, represented on the x-axis and y-axis respectively (Figure \ref{fig:plotRs}). The optional input parameter \verb'log2R.cutoff' (non-negative threshold value) will divide the genes into different classes: -\begin{itemize} - \item \verb'blue': for genes whoes \verb'log2R' larger in first condition than second condition. - \item \verb'red': for genes whoes \verb'log2R' larger in second condition than the first condition. - \item \verb'green': for genes whoes \verb'log2R' changing homodirectionally in two condition. - \item \verb'yellow': for genes whoes \verb'log2R' changing antidirectionally in two conditon. -\end{itemize} - -<>= -plotRs(test.results) -@ -\begin{figure}[h] -\centering -\includegraphics[width=.5\textwidth]{figure/plotRs-1} -\caption{Scatter plot of log2 RPF-to-mRNA ratios} -\label{fig:plotRs} -\end{figure} - -Those genes in which the difference of \verb'log2R' in two conditions did not exceed more than \verb'log2R.cutoff' are excluded. The points will be color-coded with the \verb'pvalue_final' obtained with \verb'xtail' (more significant p values having darker color). By default the \verb'log2R.cutoff' is 1. - - -\subsection{volcanoPlot} - -It can also be useful to evaluate the fold changes cutoff and p values thresholds by looking at the volcano plot. A simple function for making this plot is \Rfunction{volcanoPlot}, in which the \verb'log2FC_TE_final' is plotted on the x-axis and the negative log10 \verb'pvalue_fianl' is plotted on the y-axis (Figure \ref{fig:volcanoplot}). - -<>= -volcanoPlot(test.results) -@ -\begin{figure}[h] -\centering -\includegraphics[width=.5\textwidth]{figure/volcanoPlot-1} -\caption{volcano plot.} -\label{fig:volcanoplot} -\end{figure} - - - -\section*{Session Info} - -<>= -sessionInfo() -@ - -\begin{thebibliography}{99} -\bibitem{DESeq2} -Love MI, Huber W, Anders S: \textsl{Moderated Estimation of Fold Change and Dispersion for RNA-Seq Data with DESeq2}. Genome Biology 2014, 15:550. -A Comparison of Methods: Normalizing High-Throughput RNA Sequencing Data. -\bibitem{Reddy} -Reddy R: \textsl{A Comparison of Methods: Normalizing High-Throughput RNA Sequencing Data. Cold Spring Harbor Labs Journals}. bioRxiv 2015:1-9. -\bibitem{PC3} -Hsieh AC, Liu Y, Edlind MP, et al.: \textsl{The translational landscape of mTOR signaling steers cancer initiation and metastasis}. Nature 2012, 485:55-61. -\end{thebibliography} - -\end{document} - diff --git a/inst/doc/xtail.pdf b/inst/doc/xtail.pdf deleted file mode 100755 index de715f6..0000000 Binary files a/inst/doc/xtail.pdf and /dev/null differ diff --git a/man/plotFCs.Rd b/man/plotFCs.Rd index 0e5809c..4db4173 100755 --- a/man/plotFCs.Rd +++ b/man/plotFCs.Rd @@ -1,39 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot-fc.R \name{plotFCs} \alias{plotFCs} -\docType{package} -\title{ -Scatter plot of the log2 fold changes of mRNA and RPF -} -\description{ -A simple function that plots the log2 fold changes of mRNA and RPF -} +\alias{plotFCs,xtail-method} +\title{Scatterplot of fold changes.} \usage{ -plotFCs(object, log2FC.cutoff = 1, cex=1, xlim=NA, ylim=NA, ...) +plotFCs(object, ...) + +\S4method{plotFCs}{xtail}(object, log2FC.cutoff = 1, xlim = NULL, ylim = NULL) } \arguments{ - \item{object}{a object that generated by \code{xtail} function} - \item{log2FC.cutoff}{the threshold of log2 fold change of mRNA and RPF} - \item{cex}{cex} - \item{xlim}{the bound for points on the x-axis} - \item{ylim}{the bound for points on the y-axis} - \item{...}{further arguments to 'plot'} +\item{object}{a \code{xtail} object} + +\item{...}{optional arguments. Currently not used} + +\item{log2FC.cutoff}{cutoff for categorizing results} + +\item{xlim}{argument for limiting the range plotted on the x axis. See +\code{\link[ggplot2:scale_continuous]{scale_continuous}} for more +details.} + +\item{ylim}{argument for limiting the range plotted on the y axis. See +\code{\link[ggplot2:scale_continuous]{scale_continuous}} for more +details.} } -\examples{ - #load data - data(xtaildata) - #Get the mrna count data and rpf count data - test.mrna <- xtaildata$mrna - test.rpf <- xtaildata$rpf - #Assign condition labels to samples. - condition <- c("control","control","treat","treat") - #run xtail - test.results <- xtail(test.mrna,test.rpf,condition) - #log2 fold changes plot - plotFCs(test.results) +\value{ +a \code{ggplot} object } -\author{ -Zhengtao xiao +\description{ +A simple function that plots the fold change in mRNA and RPF. +Different classes of genes are labeled with different of color. +Each dot, representing a particular gene, are color coded by the category of +expression change. } -\references{ -Zhengtao Xiao, Qin Zou, Yu Liu, and Xuerui Yang: Genome-wide assessment of differential translations with ribosome profiling data. +\examples{ +data(xtailres) +plotFCs(xtailres) } diff --git a/man/plotRs.Rd b/man/plotRs.Rd index 8f4903b..5b036ba 100755 --- a/man/plotRs.Rd +++ b/man/plotRs.Rd @@ -1,39 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot-r.R \name{plotRs} \alias{plotRs} -\docType{package} -\title{ -Scatter plot of the log2 RPF-to-mRNA ratios -} -\description{ -A simple function that plots the log2 RPF-to-mRNA ratios in two condition -} +\alias{plotRs,xtail-method} +\title{Scatterplot of log2 RPF-to-mRNA ratios.} \usage{ -plotRs(object, log2R.cutoff = 1, cex=1, xlim=NA, ylim=NA, ...) +plotRs(object, ...) + +\S4method{plotRs}{xtail}(object, log2R.cutoff = 1, xlim = NULL, ylim = NULL) } \arguments{ - \item{object}{a object that generated by \code{xtail} function} - \item{log2R.cutoff}{the threshold of log2 RPF-to-mRNA ratios} - \item{cex}{cex} - \item{xlim}{the bound for points on the x-axis} - \item{ylim}{the bound for points on the y-axis} - \item{...}{further arguments to 'plot'} +\item{object}{a \code{xtail} object} + +\item{...}{optional arguments. Currently not used} + +\item{log2R.cutoff}{cutoff for categorizing results} + +\item{xlim}{argument for limiting the range plotted on the x axis. See +\code{\link[ggplot2:scale_continuous]{scale_continuous}} for more +details.} + +\item{ylim}{argument for limiting the range plotted on the y axis. See +\code{\link[ggplot2:scale_continuous]{scale_continuous}} for more +details.} } -\examples{ - #load data - data(xtaildata) - #Get the mrna count data and rpf count data - test.mrna <- xtaildata$mrna - test.rpf <- xtaildata$rpf - #Assign condition labels to samples. - condition <- c("control","control","treat","treat") - #run xtail - test.results <- xtail(test.mrna,test.rpf,condition) - #log2 RPF-to-mRNA ratios plot - plotRs(test.results) +\value{ +a \code{ggplot} object } -\author{ -Zhengtao xiao +\description{ +A simple function that plots the log2 RPF-to-mRNA ratios in two conditions. +Different classes of genes are labeled with different of color. +Each dot, representing a particular gene, are color coded by the category of +expression change. } -\references{ -Zhengtao Xiao, Qin Zou, Yu Liu, and Xuerui Yang: Genome-wide assessment of differential translations with ribosome profiling data. +\examples{ +data(xtailres) +plotRs(xtailres) } diff --git a/man/resultsTable.Rd b/man/resultsTable.Rd deleted file mode 100755 index 90f6f1b..0000000 --- a/man/resultsTable.Rd +++ /dev/null @@ -1,39 +0,0 @@ -\name{resultsTable} -\alias{resultsTable} -\docType{package} -\title{ -Extract the results table from the object generated by xtail -} -\description{ -This function extracts the results of the xtail -} -\usage{ -resultsTable(object,sort.by="pvalue.adjust", log2FCs = FALSE, log2Rs = FALSE) -} -\arguments{ - \item{object}{a object that generated by \code{xtail} function} - \item{sort.by}{the column name that used for sorting the result table, such as "log2FC_TE_v1", "log2FC_TE_v2" ,"pvalue_v1" , "pvalue_v2", "pvalue_final","pvalue.adjust"} - \item{log2FCs}{report the "mRNA_log2FC" and "PRF_log2FC" of first pipline of \code{xtail}} - \item{log2Rs}{report the log2 Ratios of second pipline of \code{xtail}} -} -\examples{ - #load data - data(xtaildata) - #Get the mrna count data and rpf count data - test.mrna <- xtaildata$mrna - test.rpf <- xtaildata$rpf - #Assign condition labels to samples. - condition <- c("control","control","treat","treat") - #run xtail - test.results <- xtail(test.mrna,test.rpf,condition) - #extract the results table - test.tab <- resultsTable(test.results) - #write the results - write.table(test.tab,"test_results.txt",quote=F,sep="\t") -} -\author{ -Zhengtao xiao -} -\references{ -Zhengtao Xiao, Qin Zou, Yu Liu, and Xuerui Yang: Genome-wide assessment of differential translations with ribosome profiling data. -} diff --git a/man/runXtail.Rd b/man/runXtail.Rd new file mode 100644 index 0000000..78b4515 --- /dev/null +++ b/man/runXtail.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/runXtail.R +\name{runXtail} +\alias{runXtail} +\alias{runXtail,SummarizedExperiment-method} +\alias{addXtail} +\alias{addXtail,SummarizedExperiment-method} +\title{xtail analysis wrapper for \code{SummarizedExperiment} objects} +\usage{ +runXtail(x, mrna_assay, rpf_assay, ...) + +\S4method{runXtail}{SummarizedExperiment}(x, mrna_assay, rpf_assay, ...) + +addXtail(x, ...) + +\S4method{addXtail}{SummarizedExperiment}(x, ...) +} +\arguments{ +\item{x}{a \code{RpfSummarizedExperiment} object} + +\item{mrna_assay}{a scalar character. The name of the \code{assay} containing +the \code{mRNA} data.} + +\item{rpf_assay}{a scalar character. The name of the \code{assay} containing +the \code{rpf} data.} + +\item{...}{\itemize{ +\item{For \code{runXtail}: additional parameters passed on to +\code{\link[=xtail]{xtail}}.} +\item{For \code{addXtail}: additional parameters passed on to +\code{runXtail} and \code{\link[=xtail]{xtail}}.} +}} +} +\value{ +A \code{\link[=xtail]{xtail}} object for \code{runXtail} or +an object of \code{class(x)}. +} +\description{ +The \code{\link[=xtail]{xtail}} function can be used directly with +\code{\link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}} +objects. The \code{mrna} and \code{rpf} data must be stored as two separate +assays. +} +\details{ +See \code{\link[=xtail]{xtail}} for more details on the analysis +function. +} +\examples{ +data(xtaildata) +test.mrna <- xtaildata$mrna[1:100,] +test.rpf <- xtaildata$rpf[1:100,] +condition <- c("control","control","treat","treat") + +se <- SummarizedExperiment(assays = list(mrna = test.mrna, rpf = test.rpf), + colData = DataFrame(condition = condition)) +xtail <- runXtail(se, "mrna", "rpf", condition = colData(se)$condition, + bins = 1000, threads = 2) +xtail + +se <- addXtail(se, "mrna", "rpf", condition = colData(se)$condition, + bins = 1000, threads = 2) +rowData(se) +} diff --git a/man/summary.Rd b/man/summary.Rd deleted file mode 100755 index cb3bc59..0000000 --- a/man/summary.Rd +++ /dev/null @@ -1,35 +0,0 @@ -\name{summary} -\alias{summary} -\docType{package} -\title{ -Summarize Xtail results -} -\description{ -Print a summary of the results from a xtail analysis. -} -\usage{ -summary(object,alpha) -} -\arguments{ - \item{object}{a object that generated by \code{xtail} function} - \item{alpha}{the adjusted p-value cutoff, default is 0.1} -} -\examples{ - #load data - data(xtaildata) - #Get the mrna count data and rpf count data - test.mrna <- xtaildata$mrna - test.rpf <- xtaildata$rpf - #Assign condition labels to samples. - condition <- c("control","control","treat","treat") - #run xtail - test.results <- xtail(test.mrna,test.rpf,condition) - #print the summary of the xtail results - summary(test.results) -} -\author{ -Zhengtao xiao -} -\references{ -Zhengtao Xiao, Qin Zou, Yu Liu, and Xuerui Yang: Genome-wide assessment of differential translations with ribosome profiling data. -} diff --git a/man/volcanoPlot.Rd b/man/volcanoPlot.Rd index ce5b214..b6499d2 100755 --- a/man/volcanoPlot.Rd +++ b/man/volcanoPlot.Rd @@ -1,34 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot-volcano.R \name{volcanoPlot} \alias{volcanoPlot} -\docType{package} -\title{ -Scatter plot of the log2 fold changes of TE and p-values -} -\description{ -A simple function that plots the log2 fold changes of TE and negative log10 p-valules -} +\alias{volcanoPlot,xtail-method} +\title{Volcano plot} \usage{ -volcanoPlot(object) +volcanoPlot(object, ...) + +\S4method{volcanoPlot}{xtail}(object) } \arguments{ - \item{object}{a object that generated by \code{xtail} function} +\item{object}{a \code{xtail} object} + +\item{...}{optional arguments. Currently not used} } -\examples{ - #load data - data(xtaildata) - #Get the mrna count data and rpf count data - test.mrna <- xtaildata$mrna - test.rpf <- xtaildata$rpf - #Assign condition labels to samples. - condition <- c("control","control","treat","treat") - #run xtail - test.results <- xtail(test.mrna,test.rpf,condition) - #log2 fold changes plot - volcanoPlot(test.results) +\value{ +a \code{ggplot} object } -\author{ -Zhengtao xiao +\description{ +A simple function that plots log2 fold change of TE and -log10 of pvalues +The genes are color coded by P-value (-log10) } -\references{ -Zhengtao Xiao, Qin Zou, Yu Liu, and Xuerui Yang: Genome-wide assessment of differential translations with ribosome profiling data. +\examples{ +data(xtailres) +volcanoPlot(xtailres) } diff --git a/man/write.xtail.Rd b/man/write.xtail.Rd index 3172628..b56ac3d 100755 --- a/man/write.xtail.Rd +++ b/man/write.xtail.Rd @@ -1,38 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xtail-class.R \name{write.xtail} \alias{write.xtail} -\docType{package} -\title{ -Write the xtail results table to file -} -\description{ -A simple function that write the xtail results to a file -} +\title{Write xtail results as table} \usage{ -write.xtail(object,sort.by="pvalue.adjust", log2FCs = FALSE, log2Rs = FALSE, ...) +write.xtail(object, ...) } \arguments{ - \item{object}{a object that generated by \code{xtail} function} - \item{sort.by}{the column name that used for sorting the result table, such as "log2FC_TE_v1", "log2FC_TE_v2" ,"pvalue_v1" , "pvalue_v2", "pvalue_final","pvalue.adjust"} - \item{log2FCs}{report the "mRNA_log2FC" and "PRF_log2FC" of first pipline of \code{xtail}} - \item{log2Rs}{report the log2 Ratios of second pipline of \code{xtail}} - \item{...}{further arguments to 'write.table'} +\item{object}{a \code{xtail} object} + +\item{...}{arguments passed onto \code{\link[=resultsTable]{resultsTable}} +or \code{write.table}. Should be named arguments to avoid clashes.} } -\examples{ - #load data - data(xtaildata) - #Get the mrna count data and rpf count data - test.mrna <- xtaildata$mrna - test.rpf <- xtaildata$rpf - #Assign condition labels to samples. - condition <- c("control","control","treat","treat") - #run xtail - test.results <- xtail(test.mrna,test.rpf,condition) - #write the results to file - write.xtail(test.results,"test_results.txt",quote=F,sep="\t") +\value{ +invisible result from \code{write.table} } -\author{ -Zhengtao xiao +\description{ +xtail results can be directly written to file using \code{write.xtail} +} +\examples{ +data(xtailres) +write.xtail(xtailres, file = tempfile()) } -\references{ -Zhengtao Xiao, Qin Zou, Yu Liu, and Xuerui Yang: Genome-wide assessment of differential translations with ribosome profiling data. +\seealso{ +\code{\link[utils:write.table]{write.table}} } diff --git a/man/xtail-accessors.Rd b/man/xtail-accessors.Rd new file mode 100644 index 0000000..f46d7bd --- /dev/null +++ b/man/xtail-accessors.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xtail-class.R +\name{xtail-accessors} +\alias{xtail-accessors} +\alias{resultsTable} +\alias{conditions,xtail-method} +\alias{resultsNum} +\alias{resultsNum,xtail-method} +\alias{resultsTable,xtail-method} +\alias{summary} +\alias{summary.xtail} +\title{Results table of xtail results} +\usage{ +\S4method{conditions}{xtail}(object) + +resultsNum(object, ...) + +\S4method{resultsNum}{xtail}(object, ...) + +resultsTable(object, ...) + +\S4method{resultsTable}{xtail}(object, sort.by = NULL, log2FCs = FALSE, log2Rs = FALSE, ...) + +summary(object, ...) + +\method{summary}{xtail}(object, alpha = 0.1, ...) +} +\arguments{ +\item{object}{a \code{xtail} object} + +\item{...}{optional arguments. Currently not used} + +\item{sort.by}{the column to sort with. Defaults to \code{NULL} to disable +sorting.} + +\item{log2FCs}{\code{TRUE} or \code{FALSE}: Should log2 fold change values be +returned? (defaults to \code{TRUE})} + +\item{log2Rs}{\code{TRUE} or \code{FALSE}: Should log2 ratio values be +returned? (defaults to \code{TRUE})} + +\item{alpha}{cut off for summarizing results. Only results with a adjusted +p-value lower than \code{alpha} will be reported.} +} +\value{ +a \code{DataFrame} with the results or numeric vectors +} +\description{ +To retrieve the results from the xtail run use one of the accessor functions. +} +\examples{ +data(xtailres) +resultsTable(xtailres) +conditions(xtailres) +resultsNum(xtailres) + +# sorting or results +resultsTable(xtailres, sort.by = "pvalue.adjust") +} diff --git a/man/xtail-package.Rd b/man/xtail-package.Rd new file mode 100644 index 0000000..16cc268 --- /dev/null +++ b/man/xtail-package.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xtail-package.R +\docType{package} +\name{xtail-package} +\alias{xtail-package} +\title{A tool to quantitatively assess differential translations with ribosome profiling data.} +\description{ +By pairwise comparisons of ribosome profiling data, Xtail +identifies differentially translated genes across two experimental or +physiological conditions. +} +\references{ +Zhengtao Xiao, Qin Zou, Yu Liu, and Xuerui Yang: Genome-wide +assessment of differential translations with ribosome profiling data. +} +\author{ +Zhengtao xiao +} diff --git a/man/xtail.Rd b/man/xtail.Rd index 0d9bcb1..2efe4f4 100755 --- a/man/xtail.Rd +++ b/man/xtail.Rd @@ -1,50 +1,104 @@ -\name{Xtail} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xtail-class.R, R/xtail.R +\docType{class} +\name{xtail-class} +\alias{xtail-class} \alias{xtail} -\docType{package} -\title{ -A tool to quantitatively assess differential translations with ribosome profiling data. -} -\description{ -By pairwise comparisons of ribosome profiling data, Xtail identifies differentially translated genes across two experimental or physiological conditions. -} +\title{A tool to quantitatively assess differential translations with ribosome profiling data.} \usage{ -xtail(mrna,rpf,condition,baseLevel=NA,minMeanCount=1,...) +xtail( + mrna, + rpf, + condition, + baseLevel = NA, + minMeanCount = 1, + normalize = TRUE, + p.adjust.method = "BH", + threads = NA, + bins = 10000L, + ci = 0 +) } \arguments{ - \item{mrna}{a matrix or data frame of raw mRNA count data whose rows correspond to genes and columns correspond to samples. The column names should be non-empty, and in same order with condition.} - \item{rpf}{a matrix or data frame of raw RPF count data whose rows correspond to genes and columns correspond to samples.The column names should be non-empty, and in same order with condition.} - \item{condition}{condition labels corresponding to the order of samples in mrna and rpf. There must be exactly two unique values.} - \item{baseLevel}{The baseLevel indicates which one of the two conditions will be compared against by the other one. If not specified, \code{Xtail} will return results of comparing the second condition over the first one.} - \item{minMeanCount}{\code{Xtail} uses the average expression level of each gene, across all samples as filter criterion and it omits all genes with mean counts below minMeanCount.} - \item{ci}{The level of confindence to get credible intervals of log2 fold change of translational efficiency (TE), for example 0.95.} - \item{normalize}{Whether normalization should be done (TRUE \\ FALSE). If missing, \code{Xtail} will perform median-of-ratios normlazation by default.} - \item{method.adjust}{The method to use for adjusting multiple comparisons, by default "BH", see \code{?p.adjust} } - \item{threads}{The number of CPU cores used. By default, all available cores are used.} - \item{bins}{The number of bins used for calculating the probability density of log2FC or log2R (default is 10000). This paramater will determine accuracy of pvalue. Set it small for a very quick test run.} +\item{mrna}{a matrix or data frame of raw mRNA count data whose rows +correspond to genes and columns correspond to samples. The column names +should be non-empty, and in same order with condition.} + +\item{rpf}{a matrix or data frame of raw RPF count data whose rows correspond +to genes and columns correspond to samples.The column names should be +non-empty, and in same order with condition.} + +\item{condition}{condition labels corresponding to the order of samples in +mrna and rpf. There must be exactly two unique values.} + +\item{baseLevel}{The baseLevel indicates which one of the two conditions will +be compared against by the other one. If not specified, \code{Xtail} will +return results of comparing the second condition over the first one.} + +\item{minMeanCount}{\code{Xtail} uses the average expression level of each +gene, across all samples as filter criterion and it omits all genes with +mean counts below minMeanCount.} + +\item{normalize}{Whether normalization should be done (TRUE \\ FALSE). If +missing, \code{Xtail} will perform median-of-ratios normlazation by +default.} + +\item{p.adjust.method}{The method to use for adjusting multiple comparisons, by +default "BH", see \code{?p.adjust}} + +\item{threads}{The number of CPU cores used. By default, all available cores +are used.} + +\item{bins}{The number of bins used for calculating the probability density +of log2FC or log2R (default is 10000). This paramater will determine +accuracy of pvalue. Set it small for a very quick test run.} + +\item{ci}{The level of confindence to get credible intervals of log2 fold +change of translational efficiency (TE), for example 0.95.} +} +\value{ +a \code{xtail} object +} +\description{ +By pairwise comparisons of ribosome profiling data, Xtail +identifies differentially translated genes across two experimental or +physiological conditions. } \details{ - No missing values are allowed in input data mrna and rpf. +No missing values are allowed in input data mrna and rpf. - Duplicate row names (gene names or gene ids) are not allowed. +Duplicate row names (gene names or gene ids) are not allowed. - \code{Xtail} takes in raw read counts of RPF and mRNA, and performs median-of-ratios normalization. Alternatively, users can provide normalized read counts and skip the built-in normal by setting "normalize" to FALSE. +\code{Xtail} takes in raw read counts of RPF and mRNA, and performs +median-of-ratios normalization. Alternatively, users can provide normalized +read counts and skip the built-in normal by setting "normalize" to FALSE. - The step of estimation of the probability distributions, for log2FC or log2R, will execute slowly in the current implementation, but can be speeded up by running on multiple cores using the parallel library. By default, the "detectCores" function in parallel library is used to determine the number of CPU cores in the machine on which R is running. To adjust the number of cores used, use "threads" argument to assign. +The step of estimation of the probability distributions, for log2FC or +log2R, will execute slowly in the current implementation, but can be +speeded up by running on multiple cores using the parallel library. By +default, the "detectCores" function in parallel library is used to +determine the number of CPU cores in the machine on which R is running. To +adjust the number of cores used, use "threads" argument to assign. } \examples{ - #load the data - data(xtaildata) - #Get the mrna count data and rpf count data - test.mrna <- xtaildata$mrna - test.rpf <- xtaildata$rpf - #Assign condition labels to samples. - condition <- c("control","control","treat","treat") - #run xtail - test.results <- xtail(test.mrna,test.rpf,condition) +#load the data +data(xtaildata) +# Get the mrna count data and rpf count data. For the example only the first +# 100 are used +test.mrna <- xtaildata$mrna[1:100,] +test.rpf <- xtaildata$rpf[1:100,] + +#Assign condition labels to samples. +condition <- c("control","control","treat","treat") + +#run xtail +test.results <- xtail(test.mrna,test.rpf,condition, threads = 2) +test.results +} +\references{ +Zhengtao Xiao, Qin Zou, Yu Liu, and Xuerui Yang: Genome-wide +assessment of differential translations with ribosome profiling data. } \author{ Zhengtao xiao } -\references{ -Zhengtao Xiao, Qin Zou, Yu Liu, and Xuerui Yang: Genome-wide assessment of differential translations with ribosome profiling data. -} diff --git a/man/xtaildata.Rd b/man/xtaildata.Rd index fc0f8bd..e05e58f 100755 --- a/man/xtaildata.Rd +++ b/man/xtaildata.Rd @@ -1,13 +1,21 @@ -\name{testdata} -\alias{testdata} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xtail-package.R \docType{data} -\title{Sample mRNA and RPF data} -\description{ - Sample mRNA and RPF data for 11391 genes and 4 samples. -} -\usage{data(testdata)} +\name{xtaildata} +\alias{xtaildata} +\alias{xtailres} +\title{xtail example data} \format{ - A list containing two data frames, mrna that has counts data of mRNA samples and rpf that has counts data of RPF samples. +a list of matrices + +an example results } -\keyword{datasets} +\usage{ +data(xtaildata) +data(xtailres) +} +\description{ +The \code{xtail} package includes some example data +} +\keyword{datasets} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp old mode 100644 new mode 100755 index 487ee9b..0b00080 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -1,4 +1,4 @@ -// This file was generated by Rcpp::compileAttributes +// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include @@ -6,12 +6,17 @@ using namespace Rcpp; +#ifdef RCPP_USE_GLOBAL_ROSTREAM +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); +#endif + // fitBeta2 Rcpp::List fitBeta2(SEXP ySEXP, SEXP xSEXP, SEXP nfSEXP, SEXP alpha_hatSEXP, SEXP contrastSEXP, SEXP beta_matSEXP, SEXP lambdaSEXP, SEXP tolSEXP, SEXP maxitSEXP, SEXP useQRSEXP); -RcppExport SEXP xtail_fitBeta2(SEXP ySEXPSEXP, SEXP xSEXPSEXP, SEXP nfSEXPSEXP, SEXP alpha_hatSEXPSEXP, SEXP contrastSEXPSEXP, SEXP beta_matSEXPSEXP, SEXP lambdaSEXPSEXP, SEXP tolSEXPSEXP, SEXP maxitSEXPSEXP, SEXP useQRSEXPSEXP) { +RcppExport SEXP _xtail_fitBeta2(SEXP ySEXPSEXP, SEXP xSEXPSEXP, SEXP nfSEXPSEXP, SEXP alpha_hatSEXPSEXP, SEXP contrastSEXPSEXP, SEXP beta_matSEXPSEXP, SEXP lambdaSEXPSEXP, SEXP tolSEXPSEXP, SEXP maxitSEXPSEXP, SEXP useQRSEXPSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type ySEXP(ySEXPSEXP); Rcpp::traits::input_parameter< SEXP >::type xSEXP(xSEXPSEXP); Rcpp::traits::input_parameter< SEXP >::type nfSEXP(nfSEXPSEXP); @@ -22,59 +27,59 @@ BEGIN_RCPP Rcpp::traits::input_parameter< SEXP >::type tolSEXP(tolSEXPSEXP); Rcpp::traits::input_parameter< SEXP >::type maxitSEXP(maxitSEXPSEXP); Rcpp::traits::input_parameter< SEXP >::type useQRSEXP(useQRSEXPSEXP); - __result = Rcpp::wrap(fitBeta2(ySEXP, xSEXP, nfSEXP, alpha_hatSEXP, contrastSEXP, beta_matSEXP, lambdaSEXP, tolSEXP, maxitSEXP, useQRSEXP)); - return __result; + rcpp_result_gen = Rcpp::wrap(fitBeta2(ySEXP, xSEXP, nfSEXP, alpha_hatSEXP, contrastSEXP, beta_matSEXP, lambdaSEXP, tolSEXP, maxitSEXP, useQRSEXP)); + return rcpp_result_gen; END_RCPP } // probDensity NumericVector probDensity(NumericVector k, NumericVector alpha, NumericVector intercept, NumericVector sf, NumericVector betas, NumericVector cond); -RcppExport SEXP xtail_probDensity(SEXP kSEXP, SEXP alphaSEXP, SEXP interceptSEXP, SEXP sfSEXP, SEXP betasSEXP, SEXP condSEXP) { +RcppExport SEXP _xtail_probDensity(SEXP kSEXP, SEXP alphaSEXP, SEXP interceptSEXP, SEXP sfSEXP, SEXP betasSEXP, SEXP condSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericVector >::type k(kSEXP); Rcpp::traits::input_parameter< NumericVector >::type alpha(alphaSEXP); Rcpp::traits::input_parameter< NumericVector >::type intercept(interceptSEXP); Rcpp::traits::input_parameter< NumericVector >::type sf(sfSEXP); Rcpp::traits::input_parameter< NumericVector >::type betas(betasSEXP); Rcpp::traits::input_parameter< NumericVector >::type cond(condSEXP); - __result = Rcpp::wrap(probDensity(k, alpha, intercept, sf, betas, cond)); - return __result; + rcpp_result_gen = Rcpp::wrap(probDensity(k, alpha, intercept, sf, betas, cond)); + return rcpp_result_gen; END_RCPP } // probMatrix Rcpp::NumericVector probMatrix(Rcpp::NumericVector x, Rcpp::NumericVector y, Rcpp::NumericVector side); -RcppExport SEXP xtail_probMatrix(SEXP xSEXP, SEXP ySEXP, SEXP sideSEXP) { +RcppExport SEXP _xtail_probMatrix(SEXP xSEXP, SEXP ySEXP, SEXP sideSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Rcpp::NumericVector >::type x(xSEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type y(ySEXP); Rcpp::traits::input_parameter< Rcpp::NumericVector >::type side(sideSEXP); - __result = Rcpp::wrap(probMatrix(x, y, side)); - return __result; + rcpp_result_gen = Rcpp::wrap(probMatrix(x, y, side)); + return rcpp_result_gen; END_RCPP } // probMatrixCI NumericVector probMatrixCI(NumericVector x, NumericVector y, NumericVector side, NumericVector ci); -RcppExport SEXP xtail_probMatrixCI(SEXP xSEXP, SEXP ySEXP, SEXP sideSEXP, SEXP ciSEXP) { +RcppExport SEXP _xtail_probMatrixCI(SEXP xSEXP, SEXP ySEXP, SEXP sideSEXP, SEXP ciSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericVector >::type x(xSEXP); Rcpp::traits::input_parameter< NumericVector >::type y(ySEXP); Rcpp::traits::input_parameter< NumericVector >::type side(sideSEXP); Rcpp::traits::input_parameter< NumericVector >::type ci(ciSEXP); - __result = Rcpp::wrap(probMatrixCI(x, y, side, ci)); - return __result; + rcpp_result_gen = Rcpp::wrap(probMatrixCI(x, y, side, ci)); + return rcpp_result_gen; END_RCPP } // boundFC NumericVector boundFC(NumericVector k1, NumericVector alpha1, NumericVector intercept1, NumericVector sf1, NumericVector FC1, NumericVector cond1, NumericVector k2, NumericVector alpha2, NumericVector intercept2, NumericVector sf2, NumericVector FC2, NumericVector cond2, NumericVector stepFC, NumericVector toleration); -RcppExport SEXP xtail_boundFC(SEXP k1SEXP, SEXP alpha1SEXP, SEXP intercept1SEXP, SEXP sf1SEXP, SEXP FC1SEXP, SEXP cond1SEXP, SEXP k2SEXP, SEXP alpha2SEXP, SEXP intercept2SEXP, SEXP sf2SEXP, SEXP FC2SEXP, SEXP cond2SEXP, SEXP stepFCSEXP, SEXP tolerationSEXP) { +RcppExport SEXP _xtail_boundFC(SEXP k1SEXP, SEXP alpha1SEXP, SEXP intercept1SEXP, SEXP sf1SEXP, SEXP FC1SEXP, SEXP cond1SEXP, SEXP k2SEXP, SEXP alpha2SEXP, SEXP intercept2SEXP, SEXP sf2SEXP, SEXP FC2SEXP, SEXP cond2SEXP, SEXP stepFCSEXP, SEXP tolerationSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericVector >::type k1(k1SEXP); Rcpp::traits::input_parameter< NumericVector >::type alpha1(alpha1SEXP); Rcpp::traits::input_parameter< NumericVector >::type intercept1(intercept1SEXP); @@ -89,16 +94,16 @@ BEGIN_RCPP Rcpp::traits::input_parameter< NumericVector >::type cond2(cond2SEXP); Rcpp::traits::input_parameter< NumericVector >::type stepFC(stepFCSEXP); Rcpp::traits::input_parameter< NumericVector >::type toleration(tolerationSEXP); - __result = Rcpp::wrap(boundFC(k1, alpha1, intercept1, sf1, FC1, cond1, k2, alpha2, intercept2, sf2, FC2, cond2, stepFC, toleration)); - return __result; + rcpp_result_gen = Rcpp::wrap(boundFC(k1, alpha1, intercept1, sf1, FC1, cond1, k2, alpha2, intercept2, sf2, FC2, cond2, stepFC, toleration)); + return rcpp_result_gen; END_RCPP } // xtail_test NumericVector xtail_test(NumericVector k1, NumericVector k2, NumericVector ints1, NumericVector ints2, NumericVector log2FC_1, NumericVector log2FC_2, NumericVector disper1, NumericVector disper2, NumericVector sf1, NumericVector sf2, NumericVector cond1, NumericVector cond2, IntegerVector bin, NumericVector ci); -RcppExport SEXP xtail_xtail_test(SEXP k1SEXP, SEXP k2SEXP, SEXP ints1SEXP, SEXP ints2SEXP, SEXP log2FC_1SEXP, SEXP log2FC_2SEXP, SEXP disper1SEXP, SEXP disper2SEXP, SEXP sf1SEXP, SEXP sf2SEXP, SEXP cond1SEXP, SEXP cond2SEXP, SEXP binSEXP, SEXP ciSEXP) { +RcppExport SEXP _xtail_xtail_test(SEXP k1SEXP, SEXP k2SEXP, SEXP ints1SEXP, SEXP ints2SEXP, SEXP log2FC_1SEXP, SEXP log2FC_2SEXP, SEXP disper1SEXP, SEXP disper2SEXP, SEXP sf1SEXP, SEXP sf2SEXP, SEXP cond1SEXP, SEXP cond2SEXP, SEXP binSEXP, SEXP ciSEXP) { BEGIN_RCPP - Rcpp::RObject __result; - Rcpp::RNGScope __rngScope; + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericVector >::type k1(k1SEXP); Rcpp::traits::input_parameter< NumericVector >::type k2(k2SEXP); Rcpp::traits::input_parameter< NumericVector >::type ints1(ints1SEXP); @@ -113,7 +118,22 @@ BEGIN_RCPP Rcpp::traits::input_parameter< NumericVector >::type cond2(cond2SEXP); Rcpp::traits::input_parameter< IntegerVector >::type bin(binSEXP); Rcpp::traits::input_parameter< NumericVector >::type ci(ciSEXP); - __result = Rcpp::wrap(xtail_test(k1, k2, ints1, ints2, log2FC_1, log2FC_2, disper1, disper2, sf1, sf2, cond1, cond2, bin, ci)); - return __result; + rcpp_result_gen = Rcpp::wrap(xtail_test(k1, k2, ints1, ints2, log2FC_1, log2FC_2, disper1, disper2, sf1, sf2, cond1, cond2, bin, ci)); + return rcpp_result_gen; END_RCPP } + +static const R_CallMethodDef CallEntries[] = { + {"_xtail_fitBeta2", (DL_FUNC) &_xtail_fitBeta2, 10}, + {"_xtail_probDensity", (DL_FUNC) &_xtail_probDensity, 6}, + {"_xtail_probMatrix", (DL_FUNC) &_xtail_probMatrix, 3}, + {"_xtail_probMatrixCI", (DL_FUNC) &_xtail_probMatrixCI, 4}, + {"_xtail_boundFC", (DL_FUNC) &_xtail_boundFC, 14}, + {"_xtail_xtail_test", (DL_FUNC) &_xtail_xtail_test, 14}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_xtail(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/fitBeta2.cpp b/src/fitBeta2.cpp index 05dd1d6..16b9679 100755 --- a/src/fitBeta2.cpp +++ b/src/fitBeta2.cpp @@ -81,7 +81,7 @@ Rcpp::List fitBeta2(SEXP ySEXP, SEXP xSEXP, SEXP nfSEXP, SEXP alpha_hatSEXP, SEX big_z = arma::zeros(y_m + x_p); z = arma::log(mu_hat / nfrow) + (yrow - mu_hat) / mu_hat; big_z(arma::span(0,y_m - 1)) = z; - // IRLS with Q matrix for X + // IRLS with Q matrix for X gamma_hat = q.t() * sqrt(big_w) * big_z; solve(beta_hat, r, gamma_hat); if (sum(abs(beta_hat) > large) > 0) { @@ -152,10 +152,10 @@ Rcpp::List fitBeta2(SEXP ySEXP, SEXP xSEXP, SEXP nfSEXP, SEXP alpha_hatSEXP, SEX alpha_mu[c] *= alpha_hat.row(i)[c]; } w = diagmat(mu_hat/(1.0 + alpha_mu)); - hat_matrix = sqrt(w) * x * (x.t() * w * x + ridge).i(true) * x.t() * sqrt(w); + hat_matrix = sqrt(w) * x * (x.t() * w * x + ridge).i() * x.t() * sqrt(w); hat_diagonals.row(i) = diagvec(hat_matrix).t(); // sigma is the covariance matrix for the betas - sigma = (x.t() * w * x + ridge).i(true) * x.t() * w * x * (x.t() * w * x + ridge).i(true); + sigma = (x.t() * w * x + ridge).i() * x.t() * w * x * (x.t() * w * x + ridge).i(); contrast_num.row(i) = contrast.t() * beta_hat; contrast_denom.row(i) = sqrt(contrast.t() * sigma * contrast); beta_var_mat.row(i) = diagvec(sigma).t(); @@ -169,4 +169,4 @@ Rcpp::List fitBeta2(SEXP ySEXP, SEXP xSEXP, SEXP nfSEXP, SEXP alpha_hatSEXP, SEX Rcpp::Named("contrast_denom",contrast_denom), Rcpp::Named("deviance",deviance)); } - + diff --git a/src/pdfTest.cpp b/src/pdfTest.cpp index f4eaec5..b6b0360 100755 --- a/src/pdfTest.cpp +++ b/src/pdfTest.cpp @@ -31,11 +31,11 @@ NumericVector cond){ // [[Rcpp::export]] Rcpp::NumericVector probMatrix(Rcpp::NumericVector x, Rcpp::NumericVector y, Rcpp::NumericVector side){ - // Xtail generates a joint probability matrix by multiplying two probability density dsitributions. - // This return pvalue, + // Xtail generates a joint probability matrix by multiplying two probability density dsitributions. + // This return pvalue, double pmax = 0; - int pmaxi = 0; //max probability index + int pmaxi = 0; //max probability index double drecord = 0; double s = 0; // 2*s is final pvalue NumericVector ret(2); @@ -122,14 +122,14 @@ Rcpp::NumericVector probMatrix(Rcpp::NumericVector x, Rcpp::NumericVector y, Rcp // [[Rcpp::export]] NumericVector probMatrixCI(NumericVector x, NumericVector y, NumericVector side, NumericVector ci){ - // Xtail generates a joint probability matrix by multiplying two probability density dsitributions. + // Xtail generates a joint probability matrix by multiplying two probability density dsitributions. // This return pvalue and CI double pmax = 0; - int pmaxi = 0; //max probability index + int pmaxi = 0; //max probability index double drecord = 0; double s = 0; // 2*s is final pvalue - double drecords = 0; + double drecords = 0; NumericVector ret(4); // return index of betas, pvalue,lowCI index, highCI index. int lowCI = -1; @@ -265,10 +265,10 @@ NumericVector probMatrixCI(NumericVector x, NumericVector y, NumericVector side, // [[Rcpp::export]] NumericVector boundFC(NumericVector k1, NumericVector alpha1, NumericVector intercept1, - NumericVector sf1, NumericVector FC1, NumericVector cond1, NumericVector k2, + NumericVector sf1, NumericVector FC1, NumericVector cond1, NumericVector k2, NumericVector alpha2, NumericVector intercept2, NumericVector sf2, NumericVector FC2, NumericVector cond2, NumericVector stepFC, NumericVector toleration){ - + NumericVector ret(2); //left side @@ -333,7 +333,7 @@ NumericVector xtail_test(NumericVector k1, NumericVector k2, NumericVector ints1 NumericVector ret(4); // deltaTE, pval, ci_low, ci_high - if (isnan(log2FC_1[0]) || isnan(log2FC_2[0])){ + if (std::isnan(static_cast(log2FC_1[0])) || std::isnan(static_cast(log2FC_2[0]))){ ret[1] = NA_REAL; ret[0] = NA_REAL; ret[2] = NA_REAL; @@ -358,7 +358,7 @@ NumericVector xtail_test(NumericVector k1, NumericVector k2, NumericVector ints1 NumericVector side(1); side[0] = log2FC_1[0] - log2FC_2[0]; - + if (ci[0] == 0){ NumericVector retValues = probMatrix(log2_density1, log2_density2, side); if(retValues[0] >= 0){ @@ -386,7 +386,7 @@ NumericVector xtail_test(NumericVector k1, NumericVector k2, NumericVector ints1 ret[1] = retValues[1]; //lowCI = retValues[2]; //highCI = retValues[3]; - // lowCI + // lowCI if (retValues[2]>=0){ highCI = betas[0] - betas[retValues[2]]; }else{ diff --git a/tests/runTests.R b/tests/runTests.R deleted file mode 100755 index 5962947..0000000 --- a/tests/runTests.R +++ /dev/null @@ -1 +0,0 @@ -BiocGenerics:::testPackage("xtail") \ No newline at end of file diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..c6344e0 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(xtail) + +test_check("xtail") diff --git a/tests/testthat/test-xtail.R b/tests/testthat/test-xtail.R new file mode 100644 index 0000000..6da9018 --- /dev/null +++ b/tests/testthat/test-xtail.R @@ -0,0 +1,29 @@ +context("xtail") +test_that("xtail", { + data(xtaildata) + mrna <- xtaildata$mrna[1:100,] + rpf <- xtaildata$rpf[1:100,] + condition <- c("control","control","treat","treat") + xtail <- xtail(mrna,rpf,condition,bins=100, threads = 2) + expect_s4_class(xtail,"xtail") + # accessors + expect_s4_class(resultsTable(xtail),"DataFrame") + expect_type(conditions(xtail),"character") + # plots + plot <- plotFCs(xtail) + expect_s3_class(plot,"ggplot") + plot <- plotRs(xtail) + expect_s3_class(plot,"ggplot") + plot <- volcanoPlot(xtail) + expect_s3_class(plot,"ggplot") + + # SummarizedExperiment wrapper + se <- SummarizedExperiment(assays = list(mrna = mrna, rpf = rpf), + colData = DataFrame(condition = condition)) + xtail2 <- runXtail(se, "mrna","rpf",condition = colData(se)$condition, + bins=100, threads = 2) + expect_equal(xtail,xtail2) + se <- addXtail(se, "mrna","rpf",condition = colData(se)$condition, + bins=100, threads = 2) + expect_s4_class(se,"SummarizedExperiment") +}) diff --git a/vignettes/references.bib b/vignettes/references.bib new file mode 100644 index 0000000..7c2090e --- /dev/null +++ b/vignettes/references.bib @@ -0,0 +1,66 @@ +@Article{Love2014, +author={Love, Michael I. +and Huber, Wolfgang +and Anders, Simon}, +title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, +journal={Genome Biology}, +year={2014}, +month={Dec}, +day={05}, +volume={15}, +number={12}, +pages={550}, +abstract={In comparative high-throughput sequencing assays, a fundamental task is the analysis of count data, such as read counts per gene in RNA-seq, for evidence of systematic changes across experimental conditions. Small replicate numbers, discreteness, large dynamic range and the presence of outliers require a suitable statistical approach. We present DESeq2, a method for differential analysis of count data, using shrinkage estimation for dispersions and fold changes to improve stability and interpretability of estimates. This enables a more quantitative analysis focused on the strength rather than the mere presence of differential expression. The DESeq2 package is available at http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html.}, +issn={1474-760X}, +doi={10.1186/s13059-014-0550-8}, +url={https://doi.org/10.1186/s13059-014-0550-8} +} + +@article {Reddy026062, + author = {Reddy, Rahul}, + title = {A Comparison of Methods: Normalizing High-Throughput RNA Sequencing Data}, + elocation-id = {026062}, + year = {2015}, + doi = {10.1101/026062}, + publisher = {Cold Spring Harbor Laboratory}, + abstract = {As RNA-Seq and other high-throughput sequencing grow in use and remain critical for gene expression studies, technical variability in counts data impedes studies of differential expression studies, data across samples and experiments, or reproducing results. Studies like Dillies et al. (2013) compare several between-lane normalization methods involving scaling factors, while Hansen et al. (2012) and Risso et al. (2014) propose methods that correct for sample-specific bias or use sets of control genes to isolate and remove technical variability. This paper evaluates four normalization methods in terms of reducing intra-group, technical variability and facilitating differential expression analysis or other research where the biological, inter-group variability is of interest. To this end, the four methods were evaluated in differential expression analysis between data from Pickrell et al. (2010) and Montgomery et al. (2010) and between simulated data modeled on these two datasets. Though the between-lane scaling factor methods perform worse on real data sets, they are much stronger for simulated data. We cannot reject the recommendation of Dillies et al. to use TMM and DESeq normalization, but further study of power to detect effects of different size under each normalization method is merited.}, + URL = {https://www.biorxiv.org/content/early/2015/09/03/026062}, + eprint = {https://www.biorxiv.org/content/early/2015/09/03/026062.full.pdf}, + journal = {bioRxiv} +} + +@Article{Hsieh2012, +author={Hsieh, Andrew C. +and Liu, Yi +and Edlind, Merritt P. +and Ingolia, Nicholas T. +and Janes, Matthew R. +and Sher, Annie +and Shi, Evan Y. +and Stumpf, Craig R. +and Christensen, Carly +and Bonham, Michael J. +and Wang, Shunyou +and Ren, Pingda +and Martin, Michael +and Jessen, Katti +and Feldman, Morris E. +and Weissman, Jonathan S. +and Shokat, Kevan M. +and Rommel, Christian +and Ruggero, Davide}, +title={The translational landscape of mTOR signalling steers cancer initiation and metastasis}, +journal={Nature}, +year={2012}, +month={May}, +day={01}, +volume={485}, +number={7396}, +pages={55-61}, +abstract={The mammalian target of rapamycin (mTOR) kinase is a master regulator of protein synthesis that couples nutrient sensing to cell growth and cancer. However, the downstream translationally regulated nodes of gene expression that may direct cancer development are poorly characterized. Using ribosome profiling, we uncover specialized translation of the prostate cancer genome by oncogenic mTOR signalling, revealing a remarkably specific repertoire of genes involved in cell proliferation, metabolism and invasion. We extend these findings by functionally characterizing a class of translationally controlled pro-invasion messenger RNAs that we show direct prostate cancer invasion and metastasis downstream of oncogenic mTOR signalling. Furthermore, we develop a clinically relevant ATP site inhibitor of mTOR, INK128, which reprograms this gene expression signature with therapeutic benefit for prostate cancer metastasis, for which there is presently no cure. Together, these findings extend our understanding of how the `cancerous' translation machinery steers specific cancer cell behaviours, including metastasis, and may be therapeutically targeted.}, +issn={1476-4687}, +doi={10.1038/nature10912}, +url={https://doi.org/10.1038/nature10912} +} + + diff --git a/vignettes/xtail.Rmd b/vignettes/xtail.Rmd new file mode 100644 index 0000000..48cb201 --- /dev/null +++ b/vignettes/xtail.Rmd @@ -0,0 +1,159 @@ +--- +title: "Genome-wide assessment of differential translations with ribosome profiling data the xtail package" +author: +- name: Zhengtao Xiao + affiliation: + - MOE Key Laboratory of Bioinformatics + - Tsinghua-Peking Joint Center for Life Sciences + - School of Life Sciences, Tsinghua University, Beijing 100084, China +- name: Qin Zou + affiliation: + - MOE Key Laboratory of Bioinformatics + - Tsinghua-Peking Joint Center for Life Sciences + - School of Life Sciences, Tsinghua University, Beijing 100084, China +- name: Yu Liu + affiliation: + - MOE Key Laboratory of Bioinformatics + - Tsinghua-Peking Joint Center for Life Sciences + - School of Life Sciences, Tsinghua University, Beijing 100084, China +- name: Xuerui Yang + affiliation: + - MOE Key Laboratory of Bioinformatics + - Tsinghua-Peking Joint Center for Life Sciences + - School of Life Sciences, Tsinghua University, Beijing 100084, China +date: "`r Sys.Date()`" +package: xtail +output: + BiocStyle::html_document: + toc: true + toc_float: true + df_print: paged +vignette: > + %\VignetteIndexEntry{xtail} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +bibliography: references.bib +abstract: > + If you use xtail in published research, please cite: \n + Z. Xiao, Q. Zou, Y. Liu, X. Yang: **Genome-wide assessment of differential translations with ribosome profiling data**. + **Nat Commun** 2016, 7:11194. http://www.ncbi.nlm.nih.gov/pubmed/27041671 +--- + +# Introduction + +This package, Xtail, is for identification of genes undergoing differential translation across two conditions with ribosome profiling data. Xtail is based on a simple assumption that if a gene is subjected to translational dyresgulation under certain exprimental or physiological condition, the change of its RPF abundance should be discoordinated with that of mRNA expression. Specifically, `xtail` consists of three major steps: (1) modeling of ribosome profiling data using negative binomial distribution (NB), (2) estabilishment of probability distributions for fold changes of mRNA or RPF (or RPF-to-mRNA ratios), and (3) evaluation of statistical significance and magnitude of differential translations. The differential translation of each gene is evaluated by two pipelines: +in the first one, `xtail` calculated the posterior probabilities for a range of mRNA or RPF fold changes, and eventually estabilished their probability distributions. These two distributions, represented as probability vectors, were then used to estabilish a joint probability distribution matrix, from which a new probability distribution were generated for differential translation. The P-values, point estimates and credible intervals of differential tranlsations were then calculated based on these results. In the other parallel pipline, `xtail` established probability distributions for RPF-to-mRNA ratios in two conditions and derived another distribution for differential translation. The more conserved set of results from these two parallel piplines was used as the final result. With this strategy, `xtail` performs quantification of differential translation for each gene, i.e., the extent to which a gene`s translational rate is not coordinated with the change of the mRNA expression. + +By default, `Xtail` adapts the strategy of DESeq2 [[@Love2014]](#references) to normalize read counts of mRNA and RPF in all samples, and fits NB distributions with dispersions $\alpha$ and $\mu$. + +This guide provides step-by-step instructions on how to load data, how to execute the package and how to interpret output. + +# Data Preparation + +The `xtail` package uses read counts of RPF and mRNA, in the form of rectangular table of values. The rows and columns of the table represent the genes and samples, respectively. Each cell in the `g-th` row and the `i-th` columns is the count number of reads mapped to gene `g` in sample `i`. + +Xtail takes in raw read counts of RPF and mRNA, and performs median-of-ratios normalization by default. This normalization method is also recommend by Reddy R. [[@Reddy026062]](#references). Alternatively, users can provide normalized read counts and skip the built-in normalization in Xtail. + +In this vignette, we select a published ribosome profiling dataset from human prostate cancer cell PC3 after mTOR signaling inhibition with PP242 [[@Hsieh2012]](#references). This dataset consists of mRNA and RPF data for 11391 genes in two replicates from each of the two conditions(`treatment` vs. `control`). + +# An example + +Here we run `xtail` with the ribosome profiling data described above. First we load the library and data. + +```{r begin,results="hold",message=FALSE} +library(xtail) +data(xtaildata) +``` + +Next we can view the first five lines of the mRNA (`mrna`) and RPF (`rpf`) elements of `xtaildata`. + +```{r } +mrna <- xtaildata$mrna +rpf <- xtaildata$rpf +head(mrna,5) +head(rpf,5) +``` + +We assign condition labels to the columns of the mRNA and RPF data. + +```{r } +condition <- c("control","control","treat","treat") +``` + +Next, we run the main function, `xtail()`. By default, the second condition (here is `treat`) would be compared against the first condition (here is `control`). Those genes with the minimum average expression of mRNA counts and RPF counts among all samples larger than 1 are used (can be changed by setting `minMeanCount`). All the available CPU cores are used for running program. The argument `bins` is the number of bins used for calculating the probability densities of log2FC and log2R. This paramater will determine accuracy of the final pvalue. Here, in order to keep the run-time of this vignette short, we will set `bins` to `1000`. Detailed description of the arguments of the `xtail` function can be found by typing `?xtail` at the `R` prompt. + +```{r } +test.results <- xtail(mrna,rpf,condition,bins=1000) +``` + +Now we can extract a results table using the function `resultsTable}, and examine the first five lines of the results table. + +```{r inspectData,echo=TRUE} +test.tab <- resultsTable(test.results) +head(test.tab,5) +``` + +The results of fist pipline are named with suffix `\_v1`, which are generated by comparing mRNA and RPF log2 fold changes: The element `log2FC_TE_v1` represents the log2 fold change of TE; The `pvalue_v1` represent statistical significance. The sencond pipline are named with suffix `\_v2`, which are derived by comparing log2 ratios between two conditions: `log2FC_TE_v2`, and `pvalue_v2` are log2 ratio of TE, and pvalues. Finally, the more conserved results (with larger-Pvalue) was select as the final assessment of differential translation, which are named with suffix `\_final`. The `pvalue.adjust` is the estimated false discovery rate corresponding to the `pvalue_final`. + +Users can also get the log2 fold changes of mRNA and RPF, or the log2 ratios of two conditions by setting `log2FCs` or `log2Rs` as `TRUE` in resultsTable. And the results table can be sorted by assigning the `sort.by`. Detailed description of the `resultsTable` function can be found by typing `?resultsTable`. + +Finally, the plain-text file of the results can be exported using the functions `write.csv` or `write.table`. + +```{r writeResult,eval=FALSE} +write.table(test.tab,"test_results.txt",quote=FALSE,sep="\t") +``` + +We also provide a very simple function, `write.xtail` (using the write.table function), to export the `xtail` result (test.results) to a tab delimited file. + +```{r writextailResult,eval=FALSE} +write.xtail(test.results, file = "test_results.txt", quote = FALSE, sep = "\t") +``` + +# Visualization + +## plotFCs + +In `Xtail`, the function `plotFCs` shows the result of the differential expression at the two expression levels, where each gene is a dot whose position is determined by its log2 fold change (log2FC) of transcriptional level (`mRNA_log2FC`), represented on the x-axis, and the log2FC of translational level (`RPF_log2FC`), represented on the y-axis (Figure \@ref(fig:plotFCs)). The optional input parameter of `plotFCs` is `log2FC.cutoff`, a non-negative threshold value that will divide the genes into different classes: + +- `blue`: for genes whoes `mRNA_log2FC` larger than `log2FC.cutoff` (transcriptional level). +- `red`: for genes whoes `RPF_log2FC` larger than `log2FC.cutoff` (translational level). +- `green`: for genes changing homodirectionally at both level. +- `yellow`: for genes changing antidirectionally at two levels. + +```{r plotFCs, fig.cap="Scatter plot of log2 fold changes"} +plotFCs(test.results) +``` + +Those genes in which the difference of `mRNA_log2FC` and `RPF_log2FC` did not exceed more than `log2FC.cutoff` are excluded. The points will be color-coded with the `pvalue_final` obtained with `xtail` (more significant p values having darker color). By default the `log2FC.cutoff` is 1. + +## plotRs + +Similar to `plotFCs`, the function `plotRs` shows the RPF-to-mRNA ratios in two conditions, where the position of each gene is determined by its RPF-to-mRNA ratio (log2R) in two conditions, represented on the x-axis and y-axis respectively (Figure \@ref(fig:plotRs)). The optional input parameter `log2R.cutoff` (non-negative threshold value) will divide the genes into different classes: + +- `blue`: for genes whoes `log2R` larger in first condition than second condition. +- `red`: for genes whoes `log2R` larger in second condition than the first condition. +- `green`: for genes whoes `log2R` changing homodirectionally in two condition. +- `yellow`: for genes whoes `log2R` changing antidirectionally in two conditon. + +```{r plotRs, fig.cap="Scatter plot of log2 RPF-to-mRNA ratios"} +plotRs(test.results) +``` + +Those genes in which the difference of `log2R` in two conditions did not exceed more than `log2R.cutoff` are excluded. The points will be color-coded with the `pvalue_final` obtained with `xtail` (more significant p values having darker color). By default the `log2R.cutoff` is 1. + + +## volcanoPlot + +It can also be useful to evaluate the fold changes cutoff and p values thresholds by looking at the volcano plot. A simple function for making this plot is `volcanoPlot`, in which the `log2FC_TE_final` is plotted on the x-axis and the negative log10 `pvalue_final` is plotted on the y-axis (Figure \@ref(fig:volcanoPlot)). + +```{r volcanoPlot, fig.cap="volcano plot"} +volcanoPlot(test.results) +``` + +# Session info {.unnumbered} + +```{r} +sessionInfo() +``` + +# References {.unnumbered} diff --git a/vignettes/xtail.Rnw b/vignettes/xtail.Rnw deleted file mode 100755 index 2980936..0000000 --- a/vignettes/xtail.Rnw +++ /dev/null @@ -1,222 +0,0 @@ -%\VignetteIndexEntry{xtail} -%\VignetteDepends{xtail} -%\VignetteKeywords{xtail} -%\VignettePackage{xtail} -%\VignetteEngine{knitr::knitr} - -% To compile this document -% library('knitr');rm(list=ls());knit('xtail.Rnw') - -\documentclass[12pt]{article} - - -<>= -library("knitr") -opts_chunk$set( - tidy=FALSE, - fig.show="hide", - cache=TRUE, - message=FALSE) -@ - -\title{Genome-wide assessment of differential translations \\ with ribosome profiling data \\-- the xtail package} -\author{Zhengtao Xiao$^{1-3}$, Qin Zou$^{1,3}$, Yu Liu$^{1-3}$, and Xuerui Yang$^{1-3}$ -\\[1em] \small{$^{1}$MOE Key Laboratory of Bioinformatics, } -\\ \small{$^{2}$Tsinghua-Peking Joint Center for Life Sciences,} -\\ \small{$^{3}$School of Life Sciences, Tsinghua University, Beijing 100084, China.}} - -<<>= -BiocStyle::latex() -@ - -\begin{document} - -<>= -library(knitr) -opts_chunk$set( -concordance=TRUE -) -@ - -\maketitle -\vspace{15em} - -\begin{center} - \begin{tabular}{ | l | } - \hline - \textbf{xtail version:} \Sexpr{packageVersion("xtail")} \\ - \\ - If you use xtail in published research, please cite: \\ - Z. Xiao, Q. Zou, Y. Liu, X. Yang: \textbf{Genome-wide assessment} \\ - \textbf{of differential translations with ribosome profiling data}. \\ - \emph{Nat Commun} 2016, \textbf{7}:11194. \\ - \url{http://www.ncbi.nlm.nih.gov/pubmed/27041671} \\ - \hline - \end{tabular} -\end{center} - -\newpage -\section{Introduction} - -This package, Xtail, is for identification of genes undergoing differential translation across two conditions with ribosome profiling data. Xtail is based on a simple assumption that if a gene is subjected to translational dyresgulation under certain exprimental or physiological condition, the change of its RPF abundance should be discoordinated with that of mRNA expression. Specifically, \verb'Xtail' consists of three major steps: (1) modeling of ribosome profiling data using negative binomial distribution (NB), (2) estabilishment of probability distributions for fold changes of mRNA or RPF (or RPF-to-mRNA ratios), and (3) evaluation of statistical significance and magnitude of differential translations. The differential translation of each gene is evaluated by two pipelines: -in the first one, \verb'Xtail' calculated the posterior probabilities for a range of mRNA or RPF fold changes, and eventually estabilished their probability distributions. These two distributions, represented as probability vectors, were then used to estabilish a joint probability distribution matrix, from which a new probability distribution were generated for differential translation. The P-values, point estimates and credible intervals of differential tranlsations were then calculated based on these results. In the other parallel pipline, \verb'Xtail' established probability distributions for RPF-to-mRNA ratios in two conditions and derived another distribution for differential translation. The more conserved set of results from these two parallel piplines was used as the final result. With this strategy, \verb'Xtail' performs quantification of differential translation for each gene, i.e., the extent to which a gene's translational rate is not coordinated with the change of the mRNA expression. - -By default, \verb'Xtail' adapts the strategy of DESeq2 [1] to normalize read counts of mRNA and RPF in all samples, and fits NB distributions with dispersions $\alpha$ and $\mu$. - -This guide provides step-by-step instructions on how to load data, how to excute the package and how to interpret output. - -\section{Data Preparation} - -The \verb'Xtail' package uses read counts of RPF and mRNA, in the form of rectangular table of values. The rows and columns of the table represent the genes and samples, respectively. Each cell in the \textsl{g-th} row and the \textsl{i-th} columns is the count number of reads mapped to gene \textsl{g} in sample \textsl{i}. - -Xtail takes in raw read counts of RPF and mRNA, and performs median-of-ratios normalization by default. This normalization method is also recommend by Reddy R. [2]. Alternatively, users can provide normalized read counts and skip the built-in normalization in Xtail. - -In this vignette, we select a published ribosome profiling dataset from human prostate cancer cell PC3 after mTOR signaling inhibition with PP242 [3]. This dataset consists of mRNA and RPF data for 11391 genes in two replicates from each of the two conditions("treatment" vs. "control"). - -\section{An Example} - -Here we run \verb'Xtail' with the ribosome profiling data described above. First we load the library and data. - -<>= -library(xtail) -data(xtaildata) -@ - - -Next we can view the first five lines of the mRNA (\verb'mrna') and RPF (\verb'rpf') elements of \verb'xtaildata'. - -<<>>= -mrna <- xtaildata$mrna -rpf <- xtaildata$rpf -head(mrna,5) -head(rpf,5) -@ - - -We assign condition labels to the columns of the mRNA and RPF data. - -<<>>= -condition <- c("control","control","treat","treat") -@ - - -Next, we run the main function, \Rfunction{xtail()}. By default, the second condition (here is "treat") would be compared against the first condition (here is "control"). Those genes with the minimum average expression of mRNA counts and RPF counts among all samples larger than 1 are used (can be changed by setting \verb'minMeanCount'). All the available CPU cores are used for running program. The argument \verb`"bins"` is the number of bins used for calculating the probability densities of log2FC and log2R. This paramater will determine accuracy of the final pvalue. Here, in order to keep the run-time of this vignette short, we will set \verb'bins' to "1000". Detailed description of the arguments of the \texttt{xtail} function can be found by typing \texttt{?xtail} or \texttt{help(xtail)} at the \textbf{R} prompt. - -<<>>= -test.results <- xtail(mrna,rpf,condition,bins=1000) -@ - -We can summarize some basic information of xtail results using the summary function (type \texttt{?summary} for further information). - -<>= -summary(test.results) -@ - -Now we can extract a results table using the function \Rfunction{resultsTable}, and examine the first five lines of the results table. - -<>= -test.tab <- resultsTable(test.results) -head(test.tab,5) -@ - -The results of fist pipline are named with suffix "\_v1", which are generated by comparing mRNA and RPF log2 fold changes: The element \verb'log2FC_TE_v1' represents the log2 fold change of TE; The \verb"pvalue_v1" represent statistical significance. The sencond pipline are named with suffix "\_v2", which are derived by comparing log2 ratios between two conditions: \verb'log2FC_TE_v2', and \verb'pvalue_v2' are log2 ratio of TE, and pvalues. Finally, the more conserved results (with larger-Pvalue) was select as the final assessment of differential translation, which are named with suffix "\_final". The \verb'pvalue.adjust' is the estimated false discovery rate corresponding to the \verb'pvalue_final'. - -Users can also get the log2 fold changes of mRNA and RPF, or the log2 ratios of two conditions by setting "log2FCs" or "log2Rs" as "TRUE" in resultsTable. And the results table can be sorted by assigning the "sort.by". Detailed description of the \texttt{resultsTable} function can be found by typing \texttt{?resultsTable}. - -Finally, the plain-text file of the results can be exported using the functions \textsl{write.csv} or \textsl{write.table}. - -<>= -write.table(test.tab,"test_results.txt",quote=F,sep="\t") -@ - -We also provide a very simple function, \texttt{write.xtail} (using the write.table function), to export the \verb'xtail' result (test.results) to a tab delimited file. - -<>= -write.xtail(test.results,"test_results.txt",quote=F,sep="\t") -@ - - -\section{Visualization} - -\subsection{plotFCs} - -In \verb'Xtail', the function \Rfunction{plotFCs} shows the result of the differential expression at the two expression levels, where each gene is a dot whose position is determined by its log2 fold change (log2FC) of transcriptional level (\verb'mRNA_log2FC'), represented on the x-axis, and the log2FC of translational level (\verb'RPF_log2FC'), represented on the y-axis (Figure \ref{fig:plotFCs}). The optional input parameter of \Rfunction{plotFCs} is \verb'log2FC.cutoff', a non-negative threshold value that will divide the genes into different classes: - -\begin{itemize} - \item \verb'blue': for genes whoes \verb'mRNA_log2FC' larger than \verb'log2FC.cutoff' (transcriptional level). - \item \verb'red': for genes whoes \verb'RPF_log2FC' larger than \verb'log2FC.cutoff' (translational level). - \item \verb'green': for genes changing homodirectionally at both level. - \item \verb'yellow': for genes changing antidirectionally at two levels. -\end{itemize} - - -<>= -plotFCs(test.results) -@ -\begin{figure}[h] -\centering -\includegraphics[width=.5\textwidth]{figure/plotFCs-1} -\caption{Scatter plot of log2 fold changes} -\label{fig:plotFCs} -\end{figure} - -Those genes in which the difference of \verb'mRNA_log2FC' and \verb'RPF_log2FC' did not exceed more than \verb'log2FC.cutoff' are excluded. The points will be color-coded with the \verb'pvalue_final' obtained with \verb'xtail' (more significant p values having darker color). By default the \verb'log2FC.cutoff' is 1. - - -\subsection{plotRs} - -Similar to \Rfunction{plotFCs}, the function \Rfunction{plotRs} shows the RPF-to-mRNA ratios in two conditions, where the position of each gene is determined by its RPF-to-mRNA ratio (log2R) in two conditions, represented on the x-axis and y-axis respectively (Figure \ref{fig:plotRs}). The optional input parameter \verb'log2R.cutoff' (non-negative threshold value) will divide the genes into different classes: -\begin{itemize} - \item \verb'blue': for genes whoes \verb'log2R' larger in first condition than second condition. - \item \verb'red': for genes whoes \verb'log2R' larger in second condition than the first condition. - \item \verb'green': for genes whoes \verb'log2R' changing homodirectionally in two condition. - \item \verb'yellow': for genes whoes \verb'log2R' changing antidirectionally in two conditon. -\end{itemize} - -<>= -plotRs(test.results) -@ -\begin{figure}[h] -\centering -\includegraphics[width=.5\textwidth]{figure/plotRs-1} -\caption{Scatter plot of log2 RPF-to-mRNA ratios} -\label{fig:plotRs} -\end{figure} - -Those genes in which the difference of \verb'log2R' in two conditions did not exceed more than \verb'log2R.cutoff' are excluded. The points will be color-coded with the \verb'pvalue_final' obtained with \verb'xtail' (more significant p values having darker color). By default the \verb'log2R.cutoff' is 1. - - -\subsection{volcanoPlot} - -It can also be useful to evaluate the fold changes cutoff and p values thresholds by looking at the volcano plot. A simple function for making this plot is \Rfunction{volcanoPlot}, in which the \verb'log2FC_TE_final' is plotted on the x-axis and the negative log10 \verb'pvalue_fianl' is plotted on the y-axis (Figure \ref{fig:volcanoplot}). - -<>= -volcanoPlot(test.results) -@ -\begin{figure}[h] -\centering -\includegraphics[width=.5\textwidth]{figure/volcanoPlot-1} -\caption{volcano plot.} -\label{fig:volcanoplot} -\end{figure} - - - -\section*{Session Info} - -<>= -sessionInfo() -@ - -\begin{thebibliography}{99} -\bibitem{DESeq2} -Love MI, Huber W, Anders S: \textsl{Moderated Estimation of Fold Change and Dispersion for RNA-Seq Data with DESeq2}. Genome Biology 2014, 15:550. -A Comparison of Methods: Normalizing High-Throughput RNA Sequencing Data. -\bibitem{Reddy} -Reddy R: \textsl{A Comparison of Methods: Normalizing High-Throughput RNA Sequencing Data. Cold Spring Harbor Labs Journals}. bioRxiv 2015:1-9. -\bibitem{PC3} -Hsieh AC, Liu Y, Edlind MP, et al.: \textsl{The translational landscape of mTOR signaling steers cancer initiation and metastasis}. Nature 2012, 485:55-61. -\end{thebibliography} - -\end{document} - diff --git a/vignettes/xtail.pdf b/vignettes/xtail.pdf deleted file mode 100755 index de715f6..0000000 Binary files a/vignettes/xtail.pdf and /dev/null differ diff --git a/xtail.Rproj b/xtail.Rproj index 398aa14..4a25612 100755 --- a/xtail.Rproj +++ b/xtail.Rproj @@ -6,7 +6,7 @@ AlwaysSaveHistory: Default EnableCodeIndexing: Yes UseSpacesForTab: Yes -NumSpacesForTab: 2 +NumSpacesForTab: 4 Encoding: UTF-8 RnwWeave: knitr @@ -18,3 +18,5 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageCheckArgs: --no-multiarch +PackageRoxygenize: rd,collate,namespace