@lakesea
2020-11-28T06:57:18.000000Z
字数 2196
阅读 587
Methods
Aligned the trimmed paired reads to reference genome:
hisat2 -x SA_trinity_merged.fa -1 sample1_R1.fq.gz -2 sample1_R2.fq.gz -p 12 |samtools view -@ 3 -Sb - | samtools sort -@ 3 -o sample1.sort.bam'
Install R pacakge used for counting the reads:
BioManager::install("Rsubread")BioManager::install("limma")BioManager::install("edgeR")
Using the featurecount pacakge to count the reads:
Rscript run-featurecounts.R -b sample1.bam -g annotation.gtf -o sample1_RNA
Script of run-featurecounts.R
#!/usr/bin/env Rscript# parse parameter ---------------------------------------------------------library(argparser, quietly=TRUE)# Create a parserp <- arg_parser("run featureCounts and calculate FPKM/TPM")# Add command line argumentsp <- add_argument(p, "--bam", help="input: bam file", type="character")p <- add_argument(p, "--gtf", help="input: gtf file", type="character")p <- add_argument(p, "--output", help="output prefix", type="character")# Parse the command line argumentsargv <- parse_args(p)library(Rsubread)library(limma)library(edgeR)bamFile <- argv$bamgtfFile <- argv$gtfnthreads <- 1outFilePref <- argv$outputoutStatsFilePath <- paste(outFilePref, '.log', sep = '');outCountsFilePath <- paste(outFilePref, '.count', sep = '');fCountsList = featureCounts(bamFile, annot.ext=gtfFile, isGTFAnnotationFile=TRUE, nthreads=nthreads, isPairedEnd=TRUE)dgeList = DGEList(counts=fCountsList$counts, genes=fCountsList$annotation)fpkm = rpkm(dgeList, dgeList$genes$Length)tpm = exp(log(fpkm) - log(sum(fpkm)) + log(1e6))write.table(fCountsList$stat, outStatsFilePath, sep="\t", col.names=FALSE, row.names=FALSE, quote=FALSE)featureCounts = cbind(fCountsList$annotation[,1], fCountsList$counts, fpkm, tpm)colnames(featureCounts) = c('gene_id', 'counts', 'fpkm','tpm')write.table(featureCounts, outCountsFilePath, sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
After couting the reads for each RNA sample, we megerd the RNA seq expression matrix from all samples:
ls *.count >genes.quant_files.txtperl abundance_estimates_to_matix.pl --est_method featureCounts --quant_files genes.quant_files.txt --out_prefix genes
Using the matrix to analyse the RNA-seq exprssion under different conditions:
Here we used one of the package from trinity:
conda install trinity
```
perl run_DE_analysis.pl --matrix genes.counts.matrix --method DESeq2 --samples_file samples.txt --contrasts contrasts.txt