Computes strand-specific coverage signal, and returns a GRanges object. Function also works for non-strand-specific data.
getStrandedCoverage(
dataset.gr,
field = "score",
ncores = getOption("mc.cores", 2L)
)
A GRanges object either containing ranges for each read, or
one in which readcounts for individual ranges are contained in metadata
(typically in the "score" field). dataset.gr
can also be a list of
such GRanges objects.
The name of the metadata field that contains readcounts. If no metadata field contains readcounts, and each range represents a single read, set to NULL.
Number of cores to use for calculating coverage. For a single
dataset, the max that will be used is 3, one for each possible strand
(plus, minus, and unstranded). More cores can be used if dataset.gr
is a list.
A GRanges object with signal in the "score" metadata column. Note that the output is not automatically converted into a
"basepair-resolution"
GRanges
object.
#--------------------------------------------------#
# Using included full-read data
#--------------------------------------------------#
# -> whole-read coverage sacrifices meaningful readcount
# information, but can be useful for visualization,
# e.g. for looking at RNA-seq data in a genome browser
data("PROseq_paired")
PROseq_paired[1:6]
#> GRanges object with 6 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <integer>
#> [1] chr4 1270-1295 + | 1
#> [2] chr4 41411-41428 + | 1
#> [3] chr4 42556-42590 + | 1
#> [4] chr4 42559-42588 + | 1
#> [5] chr4 42559-42593 + | 3
#> [6] chr4 42560-42593 + | 2
#> -------
#> seqinfo: 7 sequences from an unspecified genome
getStrandedCoverage(PROseq_paired, ncores = 1)[1:6]
#> GRanges object with 6 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <integer>
#> [1] chr4 1270-1295 + | 1
#> [2] chr4 41411-41428 + | 1
#> [3] chr4 42556-42558 + | 1
#> [4] chr4 42559 + | 5
#> [5] chr4 42560 + | 9
#> [6] chr4 42561-42562 + | 10
#> -------
#> seqinfo: 7 sequences from an unspecified genome
#--------------------------------------------------#
# Getting coverage from single bases of single reads
#--------------------------------------------------#
# included PROseq data is already single-base coverage
data("PROseq")
range(width(PROseq))
#> [1] 1 1
# undo coverage for the first 100 positions
ps <- PROseq[1:100]
ps_reads <- rep(ps, times = ps$score)
mcols(ps_reads) <- NULL
ps_reads[1:6]
#> GRanges object with 6 ranges and 0 metadata columns:
#> seqnames ranges strand
#> <Rle> <IRanges> <Rle>
#> [1] chr4 1295 +
#> [2] chr4 41428 +
#> [3] chr4 42588 +
#> [4] chr4 42590 +
#> [5] chr4 42590 +
#> [6] chr4 42593 +
#> -------
#> seqinfo: 7 sequences from an unspecified genome
# re-create coverage
getStrandedCoverage(ps_reads, field = NULL, ncores = 1)[1:6]
#> GRanges object with 6 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <integer>
#> [1] chr4 1295 + | 1
#> [2] chr4 41428 + | 1
#> [3] chr4 42588 + | 1
#> [4] chr4 42590 + | 2
#> [5] chr4 42593 + | 5
#> [6] chr4 42594 + | 2
#> -------
#> seqinfo: 7 sequences from an unspecified genome
#--------------------------------------------------#
# Reversing makeGRangesBRG
#--------------------------------------------------#
# -> getStrandedCoverage doesn't return single-width
# GRanges, which is useful because getting coverage
# will merge adjacent bases with equivalent scores
# included PROseq data is already single-width
range(width(PROseq))
#> [1] 1 1
isDisjoint(PROseq)
#> [1] TRUE
ps_cov <- getStrandedCoverage(PROseq, ncores = 1)
range(width(ps_cov))
#> [1] 1 6
sum(score(PROseq)) == sum(score(ps_cov) * width(ps_cov))
#> [1] TRUE
# -> Look specifically at ranges that could be combined
neighbors <- c(shift(PROseq, 1), shift(PROseq, -1))
hits <- findOverlaps(PROseq, neighbors)
idx <- unique(from(hits)) # indices for PROseq with neighbor
PROseq[idx]
#> GRanges object with 13699 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <integer>
#> [1] chr4 42593 + | 5
#> [2] chr4 42594 + | 2
#> [3] chr4 42595 + | 1
#> [4] chr4 42596 + | 1
#> [5] chr4 42618 + | 1
#> ... ... ... ... . ...
#> [13695] chr4 1306889 - | 1
#> [13696] chr4 1307114 - | 1
#> [13697] chr4 1307115 - | 2
#> [13698] chr4 1307300 - | 1
#> [13699] chr4 1307301 - | 1
#> -------
#> seqinfo: 7 sequences from an unspecified genome
getStrandedCoverage(PROseq[idx], ncores = 1)
#> GRanges object with 9704 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <integer>
#> [1] chr4 42593 + | 5
#> [2] chr4 42594 + | 2
#> [3] chr4 42595-42596 + | 1
#> [4] chr4 42618 + | 1
#> [5] chr4 42619 + | 2
#> ... ... ... ... . ...
#> [9700] chr4 1305972-1305973 - | 1
#> [9701] chr4 1306888-1306889 - | 1
#> [9702] chr4 1307114 - | 1
#> [9703] chr4 1307115 - | 2
#> [9704] chr4 1307300-1307301 - | 1
#> -------
#> seqinfo: 7 sequences from an unspecified genome