Computes strand-specific coverage signal, and returns a GRanges object. Function also works for non-strand-specific data.

getStrandedCoverage(
  dataset.gr,
  field = "score",
  ncores = getOption("mc.cores", 2L)
)

Arguments

dataset.gr

A GRanges object either containing ranges for each read, or one in which readcounts for individual ranges are contained in metadata (typically in the "score" field). dataset.gr can also be a list of such GRanges objects.

field

The name of the metadata field that contains readcounts. If no metadata field contains readcounts, and each range represents a single read, set to NULL.

ncores

Number of cores to use for calculating coverage. For a single dataset, the max that will be used is 3, one for each possible strand (plus, minus, and unstranded). More cores can be used if dataset.gr is a list.

Value

A GRanges object with signal in the "score" metadata column. Note that the output is not automatically converted into a

"basepair-resolution" GRanges object.

Author

Mike DeBerardine

Examples

#--------------------------------------------------#
# Using included full-read data
#--------------------------------------------------#
# -> whole-read coverage sacrifices meaningful readcount
#    information, but can be useful for visualization,
#    e.g. for looking at RNA-seq data in a genome browser

data("PROseq_paired")

PROseq_paired[1:6]
#> GRanges object with 6 ranges and 1 metadata column:
#>       seqnames      ranges strand |     score
#>          <Rle>   <IRanges>  <Rle> | <integer>
#>   [1]     chr4   1270-1295      + |         1
#>   [2]     chr4 41411-41428      + |         1
#>   [3]     chr4 42556-42590      + |         1
#>   [4]     chr4 42559-42588      + |         1
#>   [5]     chr4 42559-42593      + |         3
#>   [6]     chr4 42560-42593      + |         2
#>   -------
#>   seqinfo: 7 sequences from an unspecified genome

getStrandedCoverage(PROseq_paired, ncores = 1)[1:6]
#> GRanges object with 6 ranges and 1 metadata column:
#>       seqnames      ranges strand |     score
#>          <Rle>   <IRanges>  <Rle> | <integer>
#>   [1]     chr4   1270-1295      + |         1
#>   [2]     chr4 41411-41428      + |         1
#>   [3]     chr4 42556-42558      + |         1
#>   [4]     chr4       42559      + |         5
#>   [5]     chr4       42560      + |         9
#>   [6]     chr4 42561-42562      + |        10
#>   -------
#>   seqinfo: 7 sequences from an unspecified genome

#--------------------------------------------------#
# Getting coverage from single bases of single reads
#--------------------------------------------------#

# included PROseq data is already single-base coverage
data("PROseq")
range(width(PROseq))
#> [1] 1 1

# undo coverage for the first 100 positions
ps <- PROseq[1:100]
ps_reads <- rep(ps, times = ps$score)
mcols(ps_reads) <- NULL

ps_reads[1:6]
#> GRanges object with 6 ranges and 0 metadata columns:
#>       seqnames    ranges strand
#>          <Rle> <IRanges>  <Rle>
#>   [1]     chr4      1295      +
#>   [2]     chr4     41428      +
#>   [3]     chr4     42588      +
#>   [4]     chr4     42590      +
#>   [5]     chr4     42590      +
#>   [6]     chr4     42593      +
#>   -------
#>   seqinfo: 7 sequences from an unspecified genome

# re-create coverage
getStrandedCoverage(ps_reads, field = NULL, ncores = 1)[1:6]
#> GRanges object with 6 ranges and 1 metadata column:
#>       seqnames    ranges strand |     score
#>          <Rle> <IRanges>  <Rle> | <integer>
#>   [1]     chr4      1295      + |         1
#>   [2]     chr4     41428      + |         1
#>   [3]     chr4     42588      + |         1
#>   [4]     chr4     42590      + |         2
#>   [5]     chr4     42593      + |         5
#>   [6]     chr4     42594      + |         2
#>   -------
#>   seqinfo: 7 sequences from an unspecified genome

#--------------------------------------------------#
# Reversing makeGRangesBRG
#--------------------------------------------------#
# -> getStrandedCoverage doesn't return single-width
#    GRanges, which is useful because getting coverage
#    will merge adjacent bases with equivalent scores

# included PROseq data is already single-width
range(width(PROseq))
#> [1] 1 1
isDisjoint(PROseq)
#> [1] TRUE

ps_cov <- getStrandedCoverage(PROseq, ncores = 1)

range(width(ps_cov))
#> [1] 1 6
sum(score(PROseq)) == sum(score(ps_cov) * width(ps_cov))
#> [1] TRUE

# -> Look specifically at ranges that could be combined
neighbors <- c(shift(PROseq, 1), shift(PROseq, -1))
hits <- findOverlaps(PROseq, neighbors)
idx <- unique(from(hits)) # indices for PROseq with neighbor

PROseq[idx]
#> GRanges object with 13699 ranges and 1 metadata column:
#>           seqnames    ranges strand |     score
#>              <Rle> <IRanges>  <Rle> | <integer>
#>       [1]     chr4     42593      + |         5
#>       [2]     chr4     42594      + |         2
#>       [3]     chr4     42595      + |         1
#>       [4]     chr4     42596      + |         1
#>       [5]     chr4     42618      + |         1
#>       ...      ...       ...    ... .       ...
#>   [13695]     chr4   1306889      - |         1
#>   [13696]     chr4   1307114      - |         1
#>   [13697]     chr4   1307115      - |         2
#>   [13698]     chr4   1307300      - |         1
#>   [13699]     chr4   1307301      - |         1
#>   -------
#>   seqinfo: 7 sequences from an unspecified genome

getStrandedCoverage(PROseq[idx], ncores = 1)
#> GRanges object with 9704 ranges and 1 metadata column:
#>          seqnames          ranges strand |     score
#>             <Rle>       <IRanges>  <Rle> | <integer>
#>      [1]     chr4           42593      + |         5
#>      [2]     chr4           42594      + |         2
#>      [3]     chr4     42595-42596      + |         1
#>      [4]     chr4           42618      + |         1
#>      [5]     chr4           42619      + |         2
#>      ...      ...             ...    ... .       ...
#>   [9700]     chr4 1305972-1305973      - |         1
#>   [9701]     chr4 1306888-1306889      - |         1
#>   [9702]     chr4         1307114      - |         1
#>   [9703]     chr4         1307115      - |         2
#>   [9704]     chr4 1307300-1307301      - |         1
#>   -------
#>   seqinfo: 7 sequences from an unspecified genome