From d74c2dc043b0d3cf97a7cfc1b8eee0eb9a59c8e7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 25 Oct 2025 15:56:08 +0000 Subject: [PATCH 1/3] Initial plan From 5dd85a8a4d0afee72fe5d38aa21e71df9b2aa501 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 25 Oct 2025 16:00:13 +0000 Subject: [PATCH 2/3] Add pkgdown configuration and GitHub Actions workflow Co-authored-by: fadikar <22967396+fadikar@users.noreply.github.com> --- .github/workflows/pkgdown.yaml | 48 ++++++++++++++ DESCRIPTION | 4 ++ _pkgdown.yml | 67 +++++++++++++++++++ vignettes/robseqexample.Rmd | 118 +++++++++++++++++++++++++++++++++ 4 files changed, 237 insertions(+) create mode 100644 .github/workflows/pkgdown.yaml create mode 100644 _pkgdown.yml create mode 100644 vignettes/robseqexample.Rmd diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 0000000..ed7650c --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,48 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + clean: false + branch: gh-pages + folder: docs diff --git a/DESCRIPTION b/DESCRIPTION index c40b1f2..96065f3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,3 +18,7 @@ License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 +Suggests: + knitr, + rmarkdown +VignetteBuilder: knitr diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..b576436 --- /dev/null +++ b/_pkgdown.yml @@ -0,0 +1,67 @@ +url: https://schatterjee30.github.io/Robseq/ + +template: + bootstrap: 5 + bootswatch: cosmo + bslib: + primary: "#0054AD" + border-radius: 0.5rem + btn-border-radius: 0.25rem + +home: + title: "Robseq: Robust Differential Gene Expression Analysis" + description: > + A Robust Statistical Model for Differential Gene Expression Analysis in RNA-Seq Studies + +navbar: + structure: + left: [intro, reference, articles, news] + right: [search, github] + components: + home: + icon: fas fa-home fa-lg + href: index.html + reference: + text: Reference + href: reference/index.html + articles: + text: Articles + menu: + - text: Getting Started + href: articles/robseqexample.html + news: + text: News + href: news/index.html + github: + icon: fab fa-github fa-lg + href: https://github.com/schatterjee30/Robseq/ + +reference: + - title: Main Functions + desc: Core functions for differential gene expression analysis + contents: + - robust.dge + - Robseq + +figures: + dev: grDevices::png + dpi: 96 + dev.args: [] + fig.ext: png + fig.width: 7.2916667 + fig.height: ~ + fig.retina: 2 + fig.asp: 1.618 + bg: NA + +authors: + Suvo Chatterjee: + href: https://github.com/schatterjee30 + Arindam Fadikar: + href: mailto:afadikar@anl.gov + Vrushab Hanumesh: + href: mailto:vvrushab@iu.edu + Siddhant Meshram: + href: mailto:sidmeshr@iu.edu + Himel Mallick: + href: mailto:him4004@med.cornell.edu diff --git a/vignettes/robseqexample.Rmd b/vignettes/robseqexample.Rmd new file mode 100644 index 0000000..f8ffb38 --- /dev/null +++ b/vignettes/robseqexample.Rmd @@ -0,0 +1,118 @@ +--- +title: "Getting Started with Robseq" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Getting Started with Robseq} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +# Importing libraries +```{r eval=FALSE} +library(Robseq) +library(edgeR) +library(doParallel) +library(EnhancedVolcano) +``` + +# Loading Example Data +Loading Colon cancer data +```{r eval=FALSE} +load("~/Current Data Path/Colon Cancer.RData") +``` + +Extracting Colon cancer gene expression data and metadata +```{r eval=FALSE} +features = data$counts +metadata = data$metadata +``` + +# Snapshot of data +A typical bulk RNA-seq gene expression count data frame looks something like below. Note, the genes should be in the rows and the samples in columns +```{r eval=FALSE} +features[1:5, 1:5] +``` + +``` +## GSM4731674 GSM4731675 GSM4731676 GSM4731677 GSM4731678 +## TSPAN6 103 44 76 417 630 +## TNMD 1 0 0 0 18 +## DPM1 86 63 97 173 309 +## SCYL3 32 40 77 56 97 +## C1orf112 28 35 25 60 55 +``` + +A typical metadata data frame looks something like below. Note, in our pipeline the user must include a column labeled as "Exposure" which should be the variable which will be used by Robseq in performing differential expression analysis. If the user has a different label for the treatment/condition or disease status variable then the user should supply that name via "expVar" argument in Robseq +```{r eval=FALSE} +metadata[1:5, ] +``` + +``` +## Sample Exposure +## 1 GSM4731674 Tumor +## 2 GSM4731675 Tumor +## 3 GSM4731676 Tumor +## 4 GSM4731677 Tumor +## 5 GSM4731678 Tumor +``` + +# Preprocessing +A typical preprocessing step is to filter lowly abundant genes. We do so using edgeR's "filterByExpr" function +```{r eval=FALSE} +keep.exprs <- filterByExpr(features, group = as.factor(metadata$Exposure)) +paste(length(which(!keep.exprs)), ' lowly expressed genes were filtered out', sep = '') +features <- features[keep.exprs, ] +``` + +``` +## [1] "18625 lowly expressed genes were filtered out" +``` + +# Performing differential expression analysis using Robseq +To perform differential gene expression (DGE) analyses using Robseq please use the code below. Note, if your metadata has only the "Exposure" variable (such as treatment groups, conditions, disease status, and etc.) set the "coVars" argument to "NULL". However, in this working example our metadata contained three covariates such as "post-mortem interval (pmi)", "rna integrity number (rin)" and "age at death" which needed to be adjusted for in our DGE analysis. Therefore, we have supplied this three variables to the "coVars" argument +```{r eval=FALSE} +fit <- Robseq::robust.dge(features = features, + metadata = metadata, + norm.method = "RLE", + expVar = "Exposure", + coVars = NULL, + filter = FALSE, + parallel = TRUE, + ncores = detectCores() - 2, + verbose = FALSE) +``` + +# Obtaining results from Robseq +After performing DGE analysis you can extract the results table in the following manner +```{r eval=FALSE} +results <- fit$res +``` + +The results table from Robseq should look something like the following +```{r eval=FALSE} +results[1:5,] +``` + +``` +## Genes log2FC SE L.CI U.CI Pval adjPval +## 6872 BEST4 -6.631 0.255 -6.131209 -7.130791 1.565312e-54 1.617267e-50 +## 10982 ETV4 5.184 0.202 5.579913 4.788087 2.121424e-54 1.617267e-50 +## 11710 OTOP3 -6.227 0.244 -5.748769 -6.705231 1.430501e-53 7.270284e-50 +## 11725 OTOP2 -7.246 0.298 -6.661931 -7.830069 2.196497e-51 8.372498e-48 +## 14866 SPIB -5.385 0.239 -4.916569 -5.853431 7.877754e-48 2.402242e-44 +``` + +# Volcano plot to visualize DGE results +Volcano plot to visualize the DGE results obtained from Robseq +```{r eval=FALSE} + EnhancedVolcano(results, + lab = results$Genes, + x = 'log2FC', + y = 'adjPval') +``` + +![Volcano Plot](../man/volcano.png) From 9a4b073cd64ee7d9c30d3293f93b5739824f9dab Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 25 Oct 2025 16:01:26 +0000 Subject: [PATCH 3/3] Add NEWS.md and refine pkgdown configuration Co-authored-by: fadikar <22967396+fadikar@users.noreply.github.com> --- NEWS.md | 7 +++++++ _pkgdown.yml | 5 ++++- 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 NEWS.md diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..32c9ee2 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,7 @@ +# Robseq 0.0.0.9000 + +* Initial development version +* Core functionality for robust differential gene expression analysis in RNA-Seq studies +* Support for multiple normalization methods (TMM, RLE, CPM, Upper quartile, Quantile) +* Parallel processing support for improved performance +* Created pkgdown website for documentation diff --git a/_pkgdown.yml b/_pkgdown.yml index b576436..ef8d7be 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -12,8 +12,12 @@ home: title: "Robseq: Robust Differential Gene Expression Analysis" description: > A Robust Statistical Model for Differential Gene Expression Analysis in RNA-Seq Studies + links: + - text: Browse source code + href: https://github.com/schatterjee30/Robseq navbar: + logo: man/figures/RobseqLogo.png structure: left: [intro, reference, articles, news] right: [search, github] @@ -41,7 +45,6 @@ reference: desc: Core functions for differential gene expression analysis contents: - robust.dge - - Robseq figures: dev: grDevices::png