Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/content/tools/genomecov.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Usage and option summary
**-ibam** | BAM file as input for coverage. Each BAM alignment in A added to the total coverage for the genome.
| Use "stdin" or simply "-" if passing it with a UNIX pipe: For example:
| ``samtools view -b <BAM> | genomeCoverageBed -ibam stdin -g hg18.genome``
**-ibams** | The input list file each line is filename in BAM format, Report each file chrom by chrom and final all together.
**-d** Report the depth at each genome position with 1-based coordinates.
**-dz** Report the depth at each genome position with 0-based coordinates.
**-bg** Report depth in BedGraph format. For details, see: http://genome.ucsc.edu/goldenPath/help/bedgraph.html
Expand Down
207 changes: 140 additions & 67 deletions src/genomeCoverageBed/genomeCoverageBed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Licenced under the GNU General Public License 2.0 license.
#include "genomeCoverageBed.h"


BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile,
BedGenomeCoverage::BedGenomeCoverage(vector<string> bedFiles, string genomeFile,
bool eachBase, bool startSites,
bool bedGraph, bool bedGraphAll,
int max, float scale,
Expand All @@ -23,7 +23,7 @@ BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile,
bool eachBaseZeroBased,
bool add_gb_track_line, string gb_track_line_opts) {

_bedFile = bedFile;
_bedFiles = bedFiles;
_genomeFile = genomeFile;
_eachBase = eachBase;
_eachBaseZeroBased = eachBaseZeroBased;
Expand All @@ -42,20 +42,23 @@ BedGenomeCoverage::BedGenomeCoverage(string bedFile, string genomeFile,
_gb_track_line_opts = gb_track_line_opts;
_currChromName = "";
_currChromSize = 0 ;
_currBedFile = 0 ;
_lastBedFile = -1 ;



if (_bamInput == false) {
_genome = new GenomeFile(genomeFile);
}

PrintTrackDefinitionLine();

if (_bamInput == false) {
string bedFile = _bedFiles.front();
_bed = new BedFile(bedFile);
CoverageBed();
}
else {
CoverageBam(_bedFile);
CoverageBam(_bedFiles);
}
}

Expand Down Expand Up @@ -98,7 +101,7 @@ void BedGenomeCoverage::StartNewChrom(const string& newChrom) {
// empty the previous chromosome and reserve new
std::vector<DEPTH>().swap(_currChromCoverage);

if (_visitedChromosomes.find(newChrom) != _visitedChromosomes.end()) {
if (_visitedChromosomes.find(newChrom) != _visitedChromosomes.end() && _currChromName != "") {
cerr << "Input error: Chromosome " << _currChromName
<< " found in non-sequential lines. This suggests that the input file is not sorted correctly." << endl;

Expand Down Expand Up @@ -143,6 +146,7 @@ void BedGenomeCoverage::AddBlockedCoverage(const vector<BED> &bedBlocks) {
}



void BedGenomeCoverage::CoverageBed() {

BED a;
Expand Down Expand Up @@ -194,87 +198,154 @@ void BedGenomeCoverage::CoverageBed() {

void BedGenomeCoverage::PrintFinalCoverage()
{
if (_currChromName.length() > 0) {
ReportChromCoverage(_currChromCoverage, _currChromSize,
_currChromName, _currChromDepthHist);
}


// process the results of the last chromosome.
ReportChromCoverage(_currChromCoverage, _currChromSize,
_currChromName, _currChromDepthHist);
if (_eachBase == false && _bedGraph == false && _bedGraphAll == false) {
ReportGenomeCoverage(_currChromDepthHist);
}
}


void BedGenomeCoverage::CoverageBam(string bamFile) {
void BedGenomeCoverage::CoverageBam(vector<string> &_bedFiles) {

ResetChromCoverage();

// open the BAM file
BamReader reader;
if (!reader.Open(bamFile)) {
cerr << "Failed to open BAM file " << bamFile << endl;
exit(1);
std::vector<string>::iterator bedFileIt = _bedFiles.begin();
std::vector<string>::iterator bedFileFend = _bedFiles.end();
// int nfile = _bedFiles.size();
// vector<int64_t> bedpos(nfile); // no seek tell in BamReader, later. need add function in Bamtools api and store readed position for speed up

for(bedFileIt = _bedFiles.begin(); bedFileIt != bedFileFend; bedFileIt++){
string bamFile = *bedFileIt;

// open the BAM file
BamReader reader;
if (!reader.Open(bamFile)) {
cerr << "Failed to open BAM file " << bamFile << endl;
exit(1);
}

// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();

for(unsigned int ichrom=0;ichrom < refs.size(); ichrom++){
_tovisitChromosomes.insert(refs.at(ichrom).RefName);
}
reader.Close();
}

// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();

// load the BAM header references into a BEDTools "genome file"
_genome = new GenomeFile(refs);
// convert each aligned BAM entry to BED
// and compute coverage on B
BamAlignment bam;
while (reader.GetNextAlignment(bam)) {
// skip if the read is unaligned
if (bam.IsMapped() == false)
continue;

// skip if we care about strands and the strand isn't what
// the user wanted
if ( (_filterByStrand == true) &&
((_requestedStrand == "-") != bam.IsReverseStrand()) )
continue;

// extract the chrom, start and end from the BAM alignment
string chrom(refs.at(bam.RefID).RefName);
CHRPOS start = bam.Position;
CHRPOS end = bam.GetEndPosition(false, false) - 1;

// are we on a new chromosome?
if ( chrom != _currChromName )
StartNewChrom(chrom);

// add coverage accordingly.
if (!_only_5p_end && !_only_3p_end) {
bedVector bedBlocks;
// we always want to split blocks when a D CIGAR op is found.
// if the user invokes -split, we want to also split on N ops.
if (_obeySplits) { // "D" true, "N" true
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, true);
// iterative through chroms(refernce)
set<string>::iterator iterChrom;
for(iterChrom = _tovisitChromosomes.begin(); iterChrom != _tovisitChromosomes.end(); iterChrom++){
string _chromtoread = *iterChrom;

for(bedFileIt = _bedFiles.begin(); bedFileIt != bedFileFend; bedFileIt++){
_currBedFile ++;
string bamFile = *bedFileIt;

// open the BAM file
BamReader reader;
if (!reader.Open(bamFile)) {
cerr << "Failed to open BAM file " << bamFile << endl;
exit(1);
}
else { // "D" true, "N" false
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, false);

// get header & reference information
string header = reader.GetHeaderText();
RefVector refs = reader.GetReferenceData();

// load the BAM header references into a BEDTools "genome file"
_genome = new GenomeFile(refs);

// skip if no chrom(reference) find in bam file
bool nochrominfile=0;
for(unsigned int irefid=0;irefid < refs.size(); irefid++){
string tmpchrom(refs.at(irefid).RefName);
if(_chromtoread == tmpchrom){
nochrominfile=1;
}
}
if(!nochrominfile){
if(reader.IsOpen()){
reader.Close();
}
continue;
}

// are we on a new chromosome? will reset Coverage
if ( _chromtoread.c_str() != _currChromName)
StartNewChrom(_chromtoread.c_str());

// convert each aligned BAM entry to BED
// and compute coverage on B
BamAlignment bam;
bool readedblock = 0;
while (reader.GetNextAlignment(bam)) {
// skip if the read is unaligned
if (bam.IsMapped() == false)
continue;

// skip if we care about strands and the strand isn't what
// the user wanted
if ( (_filterByStrand == true) &&
((_requestedStrand == "-") != bam.IsReverseStrand()) )
continue;

// extract the chrom, start and end from the BAM alignment
string chrom(refs.at(bam.RefID).RefName);

// skip rest of file if new chrom read over
if(chrom != _chromtoread && readedblock){
reader.Close();
break;
}
CHRPOS start = bam.Position;
CHRPOS end = bam.GetEndPosition(false, false) - 1;

// add coverage accordingly.
if (!_only_5p_end && !_only_3p_end) {
bedVector bedBlocks;
// we always want to split blocks when a D CIGAR op is found.
// if the user invokes -split, we want to also split on N ops.
if (_obeySplits) { // "D" true, "N" true
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, true);
}
else { // "D" true, "N" false
GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, false);
}
AddBlockedCoverage(bedBlocks);
}
else if (_only_5p_end) {
int pos = ( !bam.IsReverseStrand() ) ? start : end;
AddCoverage(pos,pos);
}
else if (_only_3p_end) {
int pos = ( bam.IsReverseStrand() ) ? start : end;
AddCoverage(pos,pos);
}
// flag at least readed one for this chrom
if (!readedblock) {
readedblock = 1;
}
}

if(reader.IsOpen()){
// close the BAM
reader.Close();
}
AddBlockedCoverage(bedBlocks);
}
else if (_only_5p_end) {
int pos = ( !bam.IsReverseStrand() ) ? start : end;
AddCoverage(pos,pos);
}
else if (_only_3p_end) {
int pos = ( bam.IsReverseStrand() ) ? start : end;
AddCoverage(pos,pos);
}
}
// close the BAM
reader.Close();
PrintFinalCoverage();
}


void BedGenomeCoverage::ReportChromCoverage(const vector<DEPTH> &chromCov, const int &chromSize, const string &chrom, chromHistMap &chromDepthHist) {

chromHistMap tmpchromDepthHist; // use tmpchromDepthHist so not accumulate when report everyfile.
if (_eachBase) {
int depth = 0; // initialize the depth
int offset = (_eachBaseZeroBased)?0:1;
Expand Down Expand Up @@ -303,15 +374,17 @@ void BedGenomeCoverage::ReportChromCoverage(const vector<DEPTH> &chromCov, const
// maximum bin requested, then readjust the depth to be the max
if (depth >= _max) {
chromDepthHist[chrom][_max]++;
tmpchromDepthHist[chrom][_max]++;
}
else {
chromDepthHist[chrom][depth]++;
tmpchromDepthHist[chrom][depth]++;
}
depth = depth - chromCov[pos].ends;
}
// report the histogram for each chromosome
histMap::const_iterator depthIt = chromDepthHist[chrom].begin();
histMap::const_iterator depthEnd = chromDepthHist[chrom].end();
histMap::const_iterator depthIt = tmpchromDepthHist[chrom].begin();
histMap::const_iterator depthEnd = tmpchromDepthHist[chrom].end();
for (; depthIt != depthEnd; ++depthIt) {
int depth = depthIt->first;
unsigned int numBasesAtDepth = depthIt->second;
Expand Down
10 changes: 7 additions & 3 deletions src/genomeCoverageBed/genomeCoverageBed.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class BedGenomeCoverage {
public:

// constructor
BedGenomeCoverage(string bedFile, string genomeFile,
BedGenomeCoverage(vector<string> bedFiles, string genomeFile,
bool eachBase, bool startSites,
bool bedGraph, bool bedGraphAll,
int max, float scale,
Expand All @@ -57,7 +57,7 @@ class BedGenomeCoverage {
private:

// data (parms)
string _bedFile;
vector<string> _bedFiles;
string _genomeFile;
bool _bamInput;
bool _eachBase;
Expand All @@ -82,14 +82,18 @@ class BedGenomeCoverage {
chromDepthMap _chromCov;
string _currChromName ;
vector<DEPTH> _currChromCoverage;
vector<DEPTH> _allChromCoverage;
chromHistMap _currChromDepthHist;
int _currChromSize ;
unsigned int _currBedFile ;
int _lastBedFile ;
set<string> _visitedChromosomes;
set<string> _tovisitChromosomes;


// methods
void CoverageBed();
void CoverageBam(string bamFile);
void CoverageBam(vector<string> &_bamFiles);
void LoadBamHeaderIntoGenomeFile(const string &bamFile);
void ReportChromCoverage(const vector<DEPTH> &, const int &chromSize, const string &chrom, chromHistMap&);
void ReportGenomeCoverage(chromHistMap &chromDepthHist);
Expand Down
Loading