Filtering and counting spike-in reads
getSpikeInCounts(
dataset.gr,
si_pattern = NULL,
si_names = NULL,
field = "score",
sample_names = NULL,
expand_ranges = FALSE,
ncores = getOption("mc.cores", 2L)
)
removeSpikeInReads(
dataset.gr,
si_pattern = NULL,
si_names = NULL,
field = "score",
ncores = getOption("mc.cores", 2L)
)
getSpikeInReads(
dataset.gr,
si_pattern = NULL,
si_names = NULL,
field = "score",
ncores = getOption("mc.cores", 2L)
)
A GRanges object or a list of GRanges objects.
A regular expression that matches spike-in chromosomes. Can
be used in addition to, or as an alternative to si_names
.
A character vector giving the names of the spike-in
chromosomes. Can be used in addition to, or as an alternative to
si_pattern
.
The metadata field in dataset.gr
that contains
readcounts. If each range is an individual read, set field = NULL
.
An optional character vector used to rename the datasets
in dataset.gr
Logical indicating if ranges in dataset.gr
should
be treated as descriptions of single molecules (FALSE
), or if ranges
should be treated as representing multiple adjacent positions with the same
signal (TRUE
). See
getCountsByRegions
.
The number of cores to use for computations.
A dataframe containing total readcounts, experimental (non-spike-in) readcounts, and spike-in readcounts.
#--------------------------------------------------#
# Make list of dummy GRanges
#--------------------------------------------------#
gr1_rep1 <- GRanges(seqnames = c("chr1", "chr2", "spikechr1", "spikechr2"),
ranges = IRanges(start = 1:4, width = 1),
strand = "+")
gr2_rep2 <- gr2_rep1 <- gr1_rep2 <- gr1_rep1
# set readcounts
score(gr1_rep1) <- c(1, 1, 1, 1) # 2 exp + 2 spike = 4 total
score(gr2_rep1) <- c(2, 2, 1, 1) # 4 exp + 2 spike = 6 total
score(gr1_rep2) <- c(1, 1, 2, 1) # 2 exp + 3 spike = 5 total
score(gr2_rep2) <- c(4, 4, 2, 2) # 8 exp + 4 spike = 12 total
grl <- list(gr1_rep1, gr2_rep1,
gr1_rep2, gr2_rep2)
names(grl) <- c("gr1_rep1", "gr2_rep1",
"gr1_rep2", "gr2_rep2")
grl
#> $gr1_rep1
#> GRanges object with 4 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <numeric>
#> [1] chr1 1 + | 1
#> [2] chr2 2 + | 1
#> [3] spikechr1 3 + | 1
#> [4] spikechr2 4 + | 1
#> -------
#> seqinfo: 4 sequences from an unspecified genome; no seqlengths
#>
#> $gr2_rep1
#> GRanges object with 4 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <numeric>
#> [1] chr1 1 + | 2
#> [2] chr2 2 + | 2
#> [3] spikechr1 3 + | 1
#> [4] spikechr2 4 + | 1
#> -------
#> seqinfo: 4 sequences from an unspecified genome; no seqlengths
#>
#> $gr1_rep2
#> GRanges object with 4 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <numeric>
#> [1] chr1 1 + | 1
#> [2] chr2 2 + | 1
#> [3] spikechr1 3 + | 2
#> [4] spikechr2 4 + | 1
#> -------
#> seqinfo: 4 sequences from an unspecified genome; no seqlengths
#>
#> $gr2_rep2
#> GRanges object with 4 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <numeric>
#> [1] chr1 1 + | 4
#> [2] chr2 2 + | 4
#> [3] spikechr1 3 + | 2
#> [4] spikechr2 4 + | 2
#> -------
#> seqinfo: 4 sequences from an unspecified genome; no seqlengths
#>
#--------------------------------------------------#
# Count spike-in reads
#--------------------------------------------------#
# by giving names of all spike-in chromosomes
getSpikeInCounts(grl, si_names = c("spikechr1", "spikechr2"), ncores = 1)
#> sample total_reads exp_reads spike_reads
#> 1 gr1_rep1 4 2 2
#> 2 gr2_rep1 6 4 2
#> 3 gr1_rep2 5 2 3
#> 4 gr2_rep2 12 8 4
# or by matching the string/regular expression "spike" in chromosome names
getSpikeInCounts(grl, si_pattern = "spike", ncores = 1)
#> sample total_reads exp_reads spike_reads
#> 1 gr1_rep1 4 2 2
#> 2 gr2_rep1 6 4 2
#> 3 gr1_rep2 5 2 3
#> 4 gr2_rep2 12 8 4
#--------------------------------------------------#
# Filter out spike-in reads
#--------------------------------------------------#
removeSpikeInReads(grl, si_pattern = "spike", ncores = 1)
#> $gr1_rep1
#> GRanges object with 2 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <numeric>
#> [1] chr1 1 + | 1
#> [2] chr2 2 + | 1
#> -------
#> seqinfo: 2 sequences from an unspecified genome; no seqlengths
#>
#> $gr2_rep1
#> GRanges object with 2 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <numeric>
#> [1] chr1 1 + | 2
#> [2] chr2 2 + | 2
#> -------
#> seqinfo: 2 sequences from an unspecified genome; no seqlengths
#>
#> $gr1_rep2
#> GRanges object with 2 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <numeric>
#> [1] chr1 1 + | 1
#> [2] chr2 2 + | 1
#> -------
#> seqinfo: 2 sequences from an unspecified genome; no seqlengths
#>
#> $gr2_rep2
#> GRanges object with 2 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <numeric>
#> [1] chr1 1 + | 4
#> [2] chr2 2 + | 4
#> -------
#> seqinfo: 2 sequences from an unspecified genome; no seqlengths
#>
#--------------------------------------------------#
# Return spike-in reads
#--------------------------------------------------#
getSpikeInReads(grl, si_pattern = "spike", ncores = 1)
#> $gr1_rep1
#> GRanges object with 2 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <numeric>
#> [1] spikechr1 3 + | 1
#> [2] spikechr2 4 + | 1
#> -------
#> seqinfo: 2 sequences from an unspecified genome; no seqlengths
#>
#> $gr2_rep1
#> GRanges object with 2 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <numeric>
#> [1] spikechr1 3 + | 1
#> [2] spikechr2 4 + | 1
#> -------
#> seqinfo: 2 sequences from an unspecified genome; no seqlengths
#>
#> $gr1_rep2
#> GRanges object with 2 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <numeric>
#> [1] spikechr1 3 + | 2
#> [2] spikechr2 4 + | 1
#> -------
#> seqinfo: 2 sequences from an unspecified genome; no seqlengths
#>
#> $gr2_rep2
#> GRanges object with 2 ranges and 1 metadata column:
#> seqnames ranges strand | score
#> <Rle> <IRanges> <Rle> | <numeric>
#> [1] spikechr1 3 + | 2
#> [2] spikechr2 4 + | 2
#> -------
#> seqinfo: 2 sequences from an unspecified genome; no seqlengths
#>