From bf9ed14da9d20311d5ee41a07c4d4f68021bc913 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 4 Apr 2025 15:40:13 +0200 Subject: [PATCH 01/20] ctrl-c in computematrix too --- pydeeptools/deeptools/computeMatrix2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pydeeptools/deeptools/computeMatrix2.py b/pydeeptools/deeptools/computeMatrix2.py index 19a32af98d..a2b123e7e6 100644 --- a/pydeeptools/deeptools/computeMatrix2.py +++ b/pydeeptools/deeptools/computeMatrix2.py @@ -5,6 +5,7 @@ from deeptools import parserCommon from importlib.metadata import version from deeptools.hp import r_computematrix +import signal def parse_arguments(args=None): parser = \ @@ -382,7 +383,7 @@ def main(args=None): 'unscaled 5 prime': args.unscaled5prime, 'unscaled 3 prime': args.unscaled3prime } - # Assert all regions and scores exist + signal.signal(signal.SIGINT, signal.SIG_DFL) r_computematrix( args.command, args.regionsFileName, From 6260e6bdddbc3c06ec974fcb54930ef9e6c8ca5a Mon Sep 17 00:00:00 2001 From: WardDeb Date: Wed, 16 Apr 2025 15:29:36 +0200 Subject: [PATCH 02/20] bugfixes and alignment to tests --- .../test/test_data/testA_offset-1.bw | Bin 720 -> 0 bytes src/bamcompare.rs | 55 ++++++++++++++---- src/bamcoverage.rs | 30 +++++++--- src/calc.rs | 24 +++++--- src/normalization.rs | 40 ++++++++++--- 5 files changed, 114 insertions(+), 35 deletions(-) delete mode 100644 pydeeptools/deeptools/test/test_data/testA_offset-1.bw diff --git a/pydeeptools/deeptools/test/test_data/testA_offset-1.bw b/pydeeptools/deeptools/test/test_data/testA_offset-1.bw deleted file mode 100644 index 8bbc7c3d799af19e2dd555ef74112eac1bdc3809..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 720 zcmY%U)8E0uz{n86$N&cOp!72+4H1Qq1~9P(gy;z1IZT nb?;aeU@n500<#TfnhZ>U1;Stix(NngYT}@dl!8jA0n;)7379_$ diff --git a/src/bamcompare.rs b/src/bamcompare.rs index 6358f2f189..959d31ad11 100644 --- a/src/bamcompare.rs +++ b/src/bamcompare.rs @@ -10,7 +10,7 @@ use bigtools::{Value}; use crate::filehandler::{bam_ispaired, write_covfile, is_bed_or_gtf, read_bedfile}; use crate::covcalc::{bam_pileup, parse_regions, TempZip, region_divider, Region}; use crate::filtering::Alignmentfilters; -use crate::normalization::scale_factor_bamcompare; +use crate::normalization::{scale_factor_bamcompare}; use crate::calc::{median, calc_ratio}; use tempfile::{TempPath}; @@ -25,8 +25,11 @@ pub fn r_bamcompare( norm: &str, effective_genome_size: u64, scalefactorsmethod: &str, + given_sf1: f32, + given_sf2: f32, operation: &str, - pseudocount: f32, + pseudocount1: f32, + pseudocount2: f32, // filtering options extendreads: bool, // if 0, no extension extendreadslen: u32, // length of extension (0 if PE or if not extending) @@ -39,6 +42,8 @@ pub fn r_bamcompare( maxfraglen: u32, nproc: usize, _ignorechr: Py, + skip_non_covered_regions: bool, + skip_zero_over_zero: bool, binsize: u32, supregion: &str, verbose: bool, @@ -153,8 +158,22 @@ pub fn r_bamcompare( println!("{}\t{}\t{}\t{}\t{}\t{}", covcalcs[1].bamfile, covcalcs[1].ispe, covcalcs[1].mapped, covcalcs[1].unmapped, covcalcs[1].readlen, covcalcs[1].fraglen); } // Calculate scale factors. - let sf = scale_factor_bamcompare(scalefactorsmethod, covcalcs[0].mapped, covcalcs[1].mapped, binsize, effective_genome_size, norm); - println!("scale factor1 = {}, scale factor2 = {}", sf.0, sf.1); + + let mut sf = scale_factor_bamcompare( + scalefactorsmethod, + covcalcs[0].mapped, covcalcs[1].mapped, + binsize, effective_genome_size, norm, + covcalcs[0].readlen, covcalcs[1].readlen, + covcalcs[0].fraglen, covcalcs[1].fraglen, + ); + if given_sf1 != 0.0 || given_sf2 != 0.0 { + if verbose { + println!("Using given scale factors: {} and {}", given_sf1, given_sf2); + } + sf = (given_sf1, given_sf2); + } else if verbose { + println!("scale factor1 = {}, scale factor2 = {}", sf.0, sf.1); + } // Extract both vecs of TempPaths into a single vector let its = vec![ @@ -171,7 +190,7 @@ pub fn r_bamcompare( .flat_map(|c| { let readers: Vec<_> = c.into_iter().map(|x| BufReader::new(File::open(x).unwrap()).lines()).collect(); let temp_zip = TempZip { iterators: readers }; - temp_zip.into_iter().map(|mut _l| { + temp_zip.into_iter().filter_map(|mut _l| { let lines: Vec<_> = _l .iter_mut() .map(|x| x.as_mut().unwrap()) @@ -183,8 +202,16 @@ pub fn r_bamcompare( assert_eq!(lines[0].1, lines[1].1, "Error: Start position mismatch in bam files. {} != {}", lines[0].1, lines[1].1); assert_eq!(lines[0].2, lines[1].2, "Error: End position mismatch in bam files. {} != {}", lines[0].2, lines[1].2); // Calculate the coverage. - let cov = calc_ratio(lines[0].3, lines[1].3, &sf.0, &sf.1, &pseudocount, operation); - (lines[0].0.clone(), Value { start: lines[0].1, end: lines[0].2, value: cov }) + if skip_zero_over_zero && lines[0].3 == 0.0 && lines[1].3 == 0.0 { + return None; + } else if skip_non_covered_regions && lines[0].3 == 0.0 { + return None; + } else if skip_non_covered_regions && lines[1].3 == 0.0 { + return None; + } else { + let cov = calc_ratio(lines[0].3, lines[1].3, &sf.0, &sf.1, &pseudocount1, &pseudocount2, operation); + Some((lines[0].0.clone(), Value { start: lines[0].1, end: lines[0].2, value: cov })) + } }).coalesce(|p, c| { if p.1.value == c.1.value && p.0 == c.0 { Ok((p.0, Value {start: p.1.start, end: c.1.end, value: p.1.value})) @@ -200,7 +227,7 @@ pub fn r_bamcompare( .flat_map(|c| { let readers: Vec<_> = c.into_iter().map(|x| BufReader::new(File::open(x).unwrap()).lines()).collect(); let temp_zip = TempZip { iterators: readers }; - temp_zip.into_iter().map(|mut _l| { + temp_zip.into_iter().filter_map(|mut _l| { let lines: Vec<_> = _l .iter_mut() .map(|x| x.as_mut().unwrap()) @@ -212,8 +239,16 @@ pub fn r_bamcompare( assert_eq!(lines[0].1, lines[1].1, "Error: Start position mismatch in bam files. {} != {}", lines[0].1, lines[1].1); assert_eq!(lines[0].2, lines[1].2, "Error: End position mismatch in bam files. {} != {}", lines[0].2, lines[1].2); // Calculate the coverage. - let cov = calc_ratio(lines[0].3, lines[1].3, &sf.0, &sf.1, &pseudocount, operation); - (lines[0].0.clone(), Value { start: lines[0].1, end: lines[0].2, value: cov }) + if skip_zero_over_zero && lines[0].3 == 0.0 && lines[1].3 == 0.0 { + return None; + } else if skip_non_covered_regions && lines[0].3 == 0.0 { + return None; + } else if skip_non_covered_regions && lines[1].3 == 0.0 { + return None; + } else { + let cov = calc_ratio(lines[0].3, lines[1].3, &sf.0, &sf.1, &pseudocount1, &pseudocount2, operation); + Some((lines[0].0.clone(), Value { start: lines[0].1, end: lines[0].2, value: cov })) + } }) }); write_covfile(lines, ofile, ofiletype, chromsizes); diff --git a/src/bamcoverage.rs b/src/bamcoverage.rs index 688d2ec261..4e6c4505d1 100644 --- a/src/bamcoverage.rs +++ b/src/bamcoverage.rs @@ -33,7 +33,7 @@ pub fn r_bamcoverage( filterrnastrand: &str, // forward, reverse or 'None' blacklist: &str, // path to blacklist filename, or 'None' _ignorechr: Py, // list of chromosomes to ignore. Is empty if none. - _skipnoncovregions: bool, + skipnoncovregions: bool, _smoothlength: u32, // 0 = no smoothing, else it's a strictly larger then binsize binsize: u32, // filtering options @@ -89,7 +89,6 @@ pub fn r_bamcoverage( println!("Chromosomes to ignore for normalization: {:?}", ignorechr); } - // if mnase, set the min / max fragment lengths if these are not set. if mnase { if minfraglen == 0 { @@ -169,13 +168,20 @@ pub fn r_bamcoverage( ) }); - let readlen = median(readlen); + let mut readlen = median(readlen); + if filters.extendreads { + if verbose { + println!("extend reads option on, overriding readlen from {} to {}", readlen, filters.extendreadslen); + } + readlen = filters.extendreadslen as f32; + } let fraglen = median(fraglen); if verbose { println!("Read stats with ignorechr: {:?}", ignorechr); println!("Mapped: {} Unmapped: {}", mapped, _unmapped); println!("Readlen: {}, Fraglen: {}", readlen, fraglen); } + let sf = scale_factor( norm, mapped, @@ -191,15 +197,21 @@ pub fn r_bamcoverage( let lines = bg.into_iter().flat_map( |bg| { let reader = BufReader::new(File::open(bg).unwrap()); - reader.lines().map( + reader.lines().filter_map( |l| { let l = l.unwrap(); let fields: Vec<&str> = l.split('\t').collect(); - (fields[0].to_string(), Value { - start: fields[1].parse::().unwrap(), - end: fields[2].parse::().unwrap(), - value: fields[3].parse::().unwrap() * sf - }) + if skipnoncovregions && fields[3] == "0" { + None + } else { + Some( + (fields[0].to_string(), Value { + start: fields[1].parse::().unwrap(), + end: fields[2].parse::().unwrap(), + value: fields[3].parse::().unwrap() * sf + }) + ) + } } ) } diff --git a/src/calc.rs b/src/calc.rs index 75239ad1e7..97fe36393b 100644 --- a/src/calc.rs +++ b/src/calc.rs @@ -124,25 +124,26 @@ pub fn calc_ratio( cov2: f32, sf1: &f32, sf2: &f32, - pseudocount: &f32, + pseudocount1: &f32, + pseudocount2: &f32, operation: &str ) -> f32 { // Pseudocounts are only used in log2 and ratio operations // First scale factor is applied, then pseudocount, if applicable. match operation { "log2" => { - let num: f32 = (cov1 * *sf1) + *pseudocount; - let den: f32 = (cov2 * *sf2) + *pseudocount; + let num: f32 = (cov1 * *sf1) + *pseudocount1; + let den: f32 = (cov2 * *sf2) + *pseudocount2; return (num / den).log2(); } "ratio" => { - let num: f32 = (cov1 * *sf1) + *pseudocount; - let den: f32 = (cov2 * *sf2) + *pseudocount; + let num: f32 = (cov1 * *sf1) + *pseudocount1; + let den: f32 = (cov2 * *sf2) + *pseudocount2; return num / den; } "reciprocal_ratio" => { - let num: f32 = (cov1 * *sf1) + *pseudocount; - let den: f32 = (cov2 * *sf2) + *pseudocount; + let num: f32 = (cov1 * *sf1) + *pseudocount1; + let den: f32 = (cov2 * *sf2) + *pseudocount2; let ratio: f32 = num / den; if ratio >= 1.0 { return den / num; @@ -150,10 +151,15 @@ pub fn calc_ratio( return -num / den; } } + "subtract" => { + let num: f32 = (cov1 * *sf1) + *pseudocount1; + let den: f32 = (cov2 * *sf2) + *pseudocount2; + return num - den; + } _ => { // No operation is never allowed (on the py arg level, so just default to log2) - let num: f32 = (cov1 * *sf1) + *pseudocount; - let den: f32 = (cov2 * *sf2) + *pseudocount; + let num: f32 = (cov1 * *sf1) + *pseudocount1; + let den: f32 = (cov2 * *sf2) + *pseudocount2; return (num / den).log2(); } } diff --git a/src/normalization.rs b/src/normalization.rs index 228fd0e340..1fa0907389 100644 --- a/src/normalization.rs +++ b/src/normalization.rs @@ -14,6 +14,7 @@ pub fn scale_factor( return scalefactor; } let mut scale_factor = 1.0; + match norm_method { "RPKM" => { // RPKM = # reads per tile / total reads (millions) * tile length (kb) @@ -35,6 +36,7 @@ pub fn scale_factor( "RPGC" => { // RPGC = mapped reads * fragment length / effective genome size let tmp_scalefactor = (mapped as f32 * readlen as f32) / effective_genome_size as f32; + println!("Tmp scale factor: {}", tmp_scalefactor); scale_factor *= 1.0 / tmp_scalefactor; } _ => {} @@ -49,9 +51,13 @@ pub fn scale_factor_bamcompare( norm_method: &str, mapped_bam1: u32, mapped_bam2: u32, - _binsize: u32, - _effective_genome_size: u64, - _norm: &str + binsize: u32, + effective_genome_size: u64, + norm: &str, + readlen_bam1: f32, + readlen_bam2: f32, + fraglen_bam1: f32, + fraglen_bam2: f32, ) -> (f32, f32) { return match norm_method { "readCount" => { @@ -60,12 +66,32 @@ pub fn scale_factor_bamcompare( let scale_factor2 = min as f32 / mapped_bam2 as f32; (scale_factor1, scale_factor2) } - "SES" => { - // to be implemented - (1.0, 1.0) + "None" => { + // Default to scale factor calculation with 'norm'. + let scale_factor1 = scale_factor( + norm, + mapped_bam1, + binsize, + effective_genome_size, + readlen_bam1, + fraglen_bam1, + 1.0, + &false + ); + let scale_factor2 = scale_factor( + norm, + mapped_bam2, + binsize, + effective_genome_size, + readlen_bam2, + fraglen_bam2, + 1.0, + &false + ); + (scale_factor1, scale_factor2) } _ => { - (1.0, 1.0) + panic!("ScaleFactorsMethod should either be 'readCount' or 'None'."); } } } From 4d663bf75decc32bb08532faccd23fe3f6d6d088 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Wed, 16 Apr 2025 15:31:51 +0200 Subject: [PATCH 03/20] drop exactscaling, ignoreDuplicates from arguments, workaround for old python code --- pydeeptools/deeptools/bamCompare.py | 2 +- pydeeptools/deeptools/bamCoverage.py | 6 ++-- pydeeptools/deeptools/getScaleFactor.py | 39 ++++++++++++------------- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/pydeeptools/deeptools/bamCompare.py b/pydeeptools/deeptools/bamCompare.py index 223bc06c95..03c50a9e8b 100644 --- a/pydeeptools/deeptools/bamCompare.py +++ b/pydeeptools/deeptools/bamCompare.py @@ -295,7 +295,7 @@ def main(args=None): extendReads=args.extendReads, blackListFileName=args.blackListFileName, minMappingQuality=args.minMappingQuality, - ignoreDuplicates=args.ignoreDuplicates, + ignoreDuplicates=False, center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, skipZeroOverZero=args.skipZeroOverZero, diff --git a/pydeeptools/deeptools/bamCoverage.py b/pydeeptools/deeptools/bamCoverage.py index d812493509..be0074bec4 100644 --- a/pydeeptools/deeptools/bamCoverage.py +++ b/pydeeptools/deeptools/bamCoverage.py @@ -195,7 +195,7 @@ def main(args=None): numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, - ignoreDuplicates=args.ignoreDuplicates, + ignoreDuplicates=False, # ignore duplicates is no longer available. center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, @@ -222,7 +222,7 @@ def main(args=None): numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, - ignoreDuplicates=args.ignoreDuplicates, + ignoreDuplicates=False, # ignore duplicates is no longer available. center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, @@ -242,7 +242,7 @@ def main(args=None): numberOfProcessors=args.numberOfProcessors, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, - ignoreDuplicates=args.ignoreDuplicates, + ignoreDuplicates=False, # ignore duplicates is no longer available. center_read=args.centerReads, zerosToNans=args.skipNonCoveredRegions, samFlag_include=args.samFlagInclude, diff --git a/pydeeptools/deeptools/getScaleFactor.py b/pydeeptools/deeptools/getScaleFactor.py index 541b4febdc..17923f3e9b 100644 --- a/pydeeptools/deeptools/getScaleFactor.py +++ b/pydeeptools/deeptools/getScaleFactor.py @@ -59,24 +59,24 @@ def getFractionKept_worker(chrom, start, end, bamFile, args, offset): # get rid of duplicate reads that have same position on each of the # pairs - if args.ignoreDuplicates: - # Assuming more or less concordant reads, use the fragment bounds, otherwise the start positions - if tLen >= 0: - s = read.pos - e = s + tLen - else: - s = read.pnext - e = s - tLen - if read.reference_id != read.next_reference_id: - e = read.pnext - if lpos is not None and lpos == read.reference_start \ - and (s, e, read.next_reference_id, read.is_reverse) in prev_pos: - filtered += 1 - continue - if lpos != read.reference_start: - prev_pos.clear() - lpos = read.reference_start - prev_pos.add((s, e, read.next_reference_id, read.is_reverse)) + # if args.ignoreDuplicates: + # # Assuming more or less concordant reads, use the fragment bounds, otherwise the start positions + # if tLen >= 0: + # s = read.pos + # e = s + tLen + # else: + # s = read.pnext + # e = s - tLen + # if read.reference_id != read.next_reference_id: + # e = read.pnext + # if lpos is not None and lpos == read.reference_start \ + # and (s, e, read.next_reference_id, read.is_reverse) in prev_pos: + # filtered += 1 + # continue + # if lpos != read.reference_start: + # prev_pos.clear() + # lpos = read.reference_start + # prev_pos.add((s, e, read.next_reference_id, read.is_reverse)) # If filterRNAstrand is in args, then filter accordingly # This is very similar to what's used in the get_fragment_from_read function in the filterRnaStrand class @@ -146,8 +146,7 @@ def fraction_kept(args, stats): num_needed_to_sample = 0.1 * bam_mapped else: num_needed_to_sample = 1000000 - if args.exactScaling: - num_needed_to_sample = bam_mapped + num_needed_to_sample = bam_mapped if num_needed_to_sample == bam_mapped: distanceBetweenBins = 55000 if args.ignoreForNormalization: From b0df1732a379ba6e4d1919cd163eb2046d2b2bad Mon Sep 17 00:00:00 2001 From: WardDeb Date: Wed, 16 Apr 2025 15:32:13 +0200 Subject: [PATCH 04/20] deal with different pseudocount / scalefactor options on CLI --- pydeeptools/deeptools/bamCompare2.py | 98 +++++++++------------------- 1 file changed, 31 insertions(+), 67 deletions(-) diff --git a/pydeeptools/deeptools/bamCompare2.py b/pydeeptools/deeptools/bamCompare2.py index ff80995821..c9277d39f4 100644 --- a/pydeeptools/deeptools/bamCompare2.py +++ b/pydeeptools/deeptools/bamCompare2.py @@ -2,6 +2,7 @@ from deeptools import parserCommon from deeptools.hp import r_bamcompare import signal +import sys def parseArguments(): parentParser = parserCommon.getParentArgParse() @@ -73,7 +74,7 @@ def getOptionalArgs(): 'for sequencing depth differences between the samples. ' 'As an alternative, this can be set to None and an option from ' '--normalizeUsing can be used. (Default: %(default)s)', - choices=['readCount', 'SES', 'None'], + choices=['readCount', 'None'], default='readCount') optional.add_argument('--sampleLength', '-l', @@ -126,7 +127,6 @@ def getOptionalArgs(): 'values (the first value is used as the numerator ' 'pseudocount and the second the denominator pseudocount). (Default: %(default)s)', default=[1], - type=float, nargs='+', action=parserCommon.requiredLength(1, 2), required=False) @@ -158,70 +158,8 @@ def process_args(args=None): if not args.ignoreForNormalization: args.ignoreForNormalization = [] - if not isinstance(args.pseudocount, list): - args.pseudocount = [args.pseudocount] - - if len(args.pseudocount) == 1: - args.pseudocount *= 2 - return args -# get_scale_factors function is used for scaling in bamCompare -# while get_scale_factor is used for depth normalization - - -def get_scale_factors(args, statsList, mappedList): - - if args.scaleFactors: - scale_factors = list(map(float, args.scaleFactors.split(":"))) - elif args.scaleFactorsMethod == 'SES': - scalefactors_dict = estimateScaleFactor( - [args.bamfile1, args.bamfile2], - args.sampleLength, args.numberOfSamples, - 1, - mappingStatsList=mappedList, - blackListFileName=args.blackListFileName, - numberOfProcessors=args.numberOfProcessors, - verbose=args.verbose, - chrsToSkip=args.ignoreForNormalization) - - scale_factors = scalefactors_dict['size_factors'] - - if args.verbose: - print("Size factors using SES: {}".format(scale_factors)) - print("%s regions of size %s where used " % - (scalefactors_dict['sites_sampled'], - args.sampleLength)) - - print("ignoring filtering/blacklists, size factors if the number of mapped " - "reads would have been used:") - print(tuple( - float(min(mappedList)) / np.array(mappedList))) - - elif args.scaleFactorsMethod == 'readCount': - # change the scaleFactor to 1.0 - args.scaleFactor = 1.0 - # get num of kept reads for bam file 1 - args.bam = args.bamfile1 - bam1_mapped, _ = get_num_kept_reads(args, statsList[0]) - # get num of kept reads for bam file 2 - args.bam = args.bamfile2 - bam2_mapped, _ = get_num_kept_reads(args, statsList[1]) - - mapped_reads = [bam1_mapped, bam2_mapped] - - # new scale_factors (relative to min of two bams) - scale_factors = float(min(bam1_mapped, bam2_mapped)) / np.array(mapped_reads) - if args.verbose: - print("Size factors using total number " - "of mapped reads: {}".format(scale_factors)) - - elif args.scaleFactorsMethod == 'None': - scale_factors = None - - return scale_factors - - def main(args=None): """ The algorithm is composed of two steps. @@ -274,8 +212,29 @@ def main(args=None): print("Please only provide one blacklist file.") sys.exit() args.blackListFileName = args.blackListFileName[0] - - args.pseudocount = 1 + if args.scaleFactors: + if len(args.scaleFactors.split(":")) == 2: + args.sf1 = float(args.scaleFactors.split(":")[0]) + args.sf2 = float(args.scaleFactors.split(":")[0]) + elif len(args.scaleFactors.split(":")) == 1: + args.sf1 = float(args.scaleFactors.split(":")[0]) + args.sf2 = float(args.scaleFactors.split(":")[0]) + else: + print("Please provide one scale factor, or two by a ':'.") + sys.exit() + else: + args.sf1 = 0.0 + args.sf2 = 0.0 + if len(args.pseudocount) == 1: + args.pseudocount1 = float(args.pseudocount[0]) + args.pseudocount2 = float(args.pseudocount[0]) + elif len(args.pseudocount) == 2: + args.pseudocount1 = float(args.pseudocount[0]) + args.pseudocount2 = float(args.pseudocount[1]) + else: + print(f"Pseudocounts should be either one or two values. Not {args.pseudocount}") + sys.exit() + print(args) signal.signal(signal.SIGINT, signal.SIG_DFL) r_bamcompare( args.bamfile1, # bam file 1 @@ -285,8 +244,11 @@ def main(args=None): args.normalizeUsing, # normalization method args.effectiveGenomeSize, # effective genome size args.scaleFactorsMethod, # scaling method + args.sf1, + args.sf2, args.operation, - args.pseudocount, + args.pseudocount1, + args.pseudocount2, args.extendReads, args.extendReadsLen, args.centerReads, @@ -298,6 +260,8 @@ def main(args=None): args.maxFragmentLength, args.numberOfProcessors, # threads args.ignoreForNormalization, + args.skipNonCoveredRegions, + args.skipZeroOverZero, args.binSize, # bin size args.region, # regions args.verbose, # verbose From 48c0e2a951f8f7fbd2d5e1fff45c3951e7a1f352 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Wed, 16 Apr 2025 15:32:24 +0200 Subject: [PATCH 05/20] drop ignoreduplicates from parser --- pydeeptools/deeptools/parserCommon.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pydeeptools/deeptools/parserCommon.py b/pydeeptools/deeptools/parserCommon.py index 240259bccd..a65f68667b 100755 --- a/pydeeptools/deeptools/parserCommon.py +++ b/pydeeptools/deeptools/parserCommon.py @@ -79,13 +79,13 @@ def read_options(): default=False, metavar="INT bp") - group.add_argument('--ignoreDuplicates', - help='If set, reads that have the same orientation ' - 'and start position will be considered only ' - 'once. If reads are paired, the mate\'s position ' - 'also has to coincide to ignore a read.', - action='store_true' - ) + # group.add_argument('--ignoreDuplicates', + # help='If set, reads that have the same orientation ' + # 'and start position will be considered only ' + # 'once. If reads are paired, the mate\'s position ' + # 'also has to coincide to ignore a read.', + # action='store_true' + # ) group.add_argument('--minMappingQuality', metavar='INT', From c41987500457b373be246f7075ee0bf00cd6b763 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Wed, 16 Apr 2025 15:32:34 +0200 Subject: [PATCH 06/20] reflect changes in changelog --- docs/content/changelog.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/content/changelog.rst b/docs/content/changelog.rst index 34fe18d26d..8cc1b75624 100644 --- a/docs/content/changelog.rst +++ b/docs/content/changelog.rst @@ -41,6 +41,8 @@ Core * normalization - Exactscaling is no longer an option, it's always performed. + - SES option in bamCompare mode is no longer available. + - blackList filtering is now performed on a position-based level. Meaning reads that overlap partially with the blacklist can still contribute to the signal. * alignmentSieve - options label, smartLabels, genomeChunkLength are removed. From a0935c31f1c9c89ad5d1d40f843e614fe92d1145 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Wed, 16 Apr 2025 15:32:46 +0200 Subject: [PATCH 07/20] update pytests bamCoverage, bamCompare --- .../test/test_bamCoverage_and_bamCompare.py | 297 +++++++++++------- 1 file changed, 188 insertions(+), 109 deletions(-) diff --git a/pydeeptools/deeptools/test/test_bamCoverage_and_bamCompare.py b/pydeeptools/deeptools/test/test_bamCoverage_and_bamCompare.py index ac1f23ce20..0a084ebf2a 100644 --- a/pydeeptools/deeptools/test/test_bamCoverage_and_bamCompare.py +++ b/pydeeptools/deeptools/test/test_bamCoverage_and_bamCompare.py @@ -1,6 +1,5 @@ -import deeptools.bamCoverage as bam_cov -import deeptools.bamCompare as bam_comp -import deeptools.getScaleFactor as gs +import deeptools.bamCoverage2 as bam_cov +import deeptools.bamCompare2 as bam_comp import os.path import filecmp from os import unlink @@ -37,7 +36,8 @@ def test_bam_coverage_arguments(): Test minimal command line args for bamCoverage """ outfile = '/tmp/test_file.bg' - for fname in [BAMFILE_B, CRAMFILE_B]: + #for fname in [BAMFILE_B, CRAMFILE_B]: + for fname in [BAMFILE_B]: args = "--bam {} -o {} --outFileFormat bedgraph".format(fname, outfile).split() bam_cov.main(args) @@ -51,7 +51,8 @@ def test_bam_coverage_arguments(): def test_bam_coverage_extend(): outfile = '/tmp/test_file.bg' - for fname in [BAMFILE_B, CRAMFILE_B]: + #for fname in [BAMFILE_B, CRAMFILE_B]: + for fname in [BAMFILE_B]: args = "-b {} -o {} --extendReads 100 --outFileFormat bedgraph".format(fname, outfile).split() bam_cov.main(args) _foo = open(outfile, 'r') @@ -65,8 +66,9 @@ def test_bam_coverage_extend(): def test_bam_coverage_extend_and_normalizeUsingRPGC(): outfile = '/tmp/test_file.bg' - for fname in [BAMFILE_B, CRAMFILE_B]: - args = "-b {} -o {} --normalizeUsing RPGC --effectiveGenomeSize 200 --extendReads 100 " \ + #for fname in [BAMFILE_B, CRAMFILE_B]: + for fname in [BAMFILE_B]: + args = "-b {} -o {} --normalizeUsing RPGC --effectiveGenomeSize 200 --extendReads 100 --verbose " \ "--outFileFormat bedgraph".format(fname, outfile).split() bam_cov.main(args) _foo = open(outfile, 'r') @@ -81,7 +83,8 @@ def test_bam_coverage_extend_and_normalizeUsingRPGC(): def test_bam_coverage_skipnas(): outfile = '/tmp/test_file.bg' - for fname in [BAMFILE_B, CRAMFILE_B]: + #for fname in [BAMFILE_B, CRAMFILE_B]: + for fname in [BAMFILE_B]: args = "--bam {} -o {} --outFileFormat bedgraph --skipNAs".format(fname, outfile).split() bam_cov.main(args) @@ -93,18 +96,19 @@ def test_bam_coverage_skipnas(): unlink(outfile) -def test_bam_coverage_filtering(): - outfile = '/tmp/test_file.bg' - for fname in [BAMFILE_B, CRAMFILE_B]: - args = "--bam {} -o {} --outFileFormat bedgraph --ignoreDuplicates --verbose".format(fname, outfile).split() - bam_cov.main(args) +# def test_bam_coverage_filtering(): +# outfile = '/tmp/test_file.bg' +# #for fname in [BAMFILE_B, CRAMFILE_B]: +# for fname in [BAMFILE_B]: +# args = "--bam {} -o {} --outFileFormat bedgraph --ignoreDuplicates --verbose".format(fname, outfile).split() +# bam_cov.main(args) - _foo = open(outfile, 'r') - resp = _foo.readlines() - _foo.close() - expected = ['3R\t0\t50\t0\n', '3R\t50\t200\t1\n'] - assert resp == expected, "{} != {}".format(resp, expected) - unlink(outfile) +# _foo = open(outfile, 'r') +# resp = _foo.readlines() +# _foo.close() +# expected = ['3R\t0\t50\t0\n', '3R\t50\t200\t1\n'] +# assert resp == expected, "{} != {}".format(resp, expected) +# unlink(outfile) def test_bam_compare_arguments(): @@ -114,7 +118,8 @@ def test_bam_compare_arguments(): is 1.0 for all bins. """ outfile = '/tmp/test_file.bg' - for fname in [BAMFILE_B, CRAMFILE_B]: + #for fname in [BAMFILE_B, CRAMFILE_B]: + for fname in [BAMFILE_B]: args = "--bamfile1 {} --bamfile2 {} " \ "-o {} -p 1 --outFileFormat bedgraph --operation ratio".format(fname, fname, outfile).split() bam_comp.main(args) @@ -132,8 +137,9 @@ def test_bam_compare_diff_files(): Test with two different files """ outfile = '/tmp/test_file.bg' - for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: - args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --operation subtract " \ + #for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: + for A, B in [(BAMFILE_A, BAMFILE_B)]: + args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --operation subtract --verbose " \ "-o {} -p 1 --outFileFormat bedgraph".format(A, B, outfile).split() bam_comp.main(args) @@ -168,47 +174,47 @@ def test_bam_compare_ZoverZ(): """ outfile = '/tmp/test_file.bg' args = "--bamfile1 {} --bamfile2 {} --outFileFormat bedgraph --scaleFactors 1:1 -o {} " \ - "--skipZeroOverZero".format(BAMFILE_A, BAMFILE_B, outfile).split() + "--skipZeroOverZero --verbose".format(BAMFILE_A, BAMFILE_B, outfile).split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() - expected = ['3R\t50\t100\t-1\n', '3R\t100\t150\t0\n', '3R\t150\t200\t-0.584963\n'] + expected = ['3R\t50\t100\t-1\n', '3R\t100\t150\t0\n', '3R\t150\t200\t-0.5849624\n'] assert f"{resp}" == f"{expected}", f"{resp} != {expected}" unlink(outfile) -def test_get_num_kept_reads(): - """ - Test the scale factor functions - """ - for fname in [BAMFILE_A, CRAMFILE_A]: - args = "--bam {} -o /tmp/test".format(fname).split() +# def test_get_num_kept_reads(): +# """ +# Test the scale factor functions +# """ +# for fname in [BAMFILE_A, CRAMFILE_A]: +# args = "--bam {} -o /tmp/test".format(fname).split() - args = bam_cov.process_args(args) - num_kept_reads, total_reads = gs.get_num_kept_reads(args, None) +# args = bam_cov.process_args(args) +# num_kept_reads, total_reads = gs.get_num_kept_reads(args, None) - # bam file 1 has 2 reads in 3R and 2 read in chr_cigar - assert num_kept_reads == 3, "num_kept_reads is wrong" - assert total_reads == 3, "num total reads is wrong" +# # bam file 1 has 2 reads in 3R and 2 read in chr_cigar +# assert num_kept_reads == 3, "num_kept_reads is wrong" +# assert total_reads == 3, "num total reads is wrong" - # ignore chr_cigar to count the total number of reads - args = "--bam {} --ignoreForNormalization chr_cigar -o /tmp/test".format(fname).split() - args = bam_cov.process_args(args) - num_kept_reads, total_reads = gs.get_num_kept_reads(args, None) +# # ignore chr_cigar to count the total number of reads +# args = "--bam {} --ignoreForNormalization chr_cigar -o /tmp/test".format(fname).split() +# args = bam_cov.process_args(args) +# num_kept_reads, total_reads = gs.get_num_kept_reads(args, None) - # the number of kept reads should be 2 as the read on chr_cigar is skipped - assert num_kept_reads == 2, "num_kept_reads is wrong ({})".format(num_kept_reads) +# # the number of kept reads should be 2 as the read on chr_cigar is skipped +# assert num_kept_reads == 2, "num_kept_reads is wrong ({})".format(num_kept_reads) - # test filtering by read direction. Only forward reads are kept - args = "--bam {} -o /tmp/test --samFlagExclude 16 --ignoreForNormalization chr_cigar ".format(fname).split() +# # test filtering by read direction. Only forward reads are kept +# args = "--bam {} -o /tmp/test --samFlagExclude 16 --ignoreForNormalization chr_cigar ".format(fname).split() - args = bam_cov.process_args(args) - num_kept_reads, total_reads = gs.get_num_kept_reads(args, None) +# args = bam_cov.process_args(args) +# num_kept_reads, total_reads = gs.get_num_kept_reads(args, None) - # only one forward read is expected in - assert num_kept_reads == 1, "num_kept_reads is wrong" +# # only one forward read is expected in +# assert num_kept_reads == 1, "num_kept_reads is wrong" def test_bam_compare_diff_files_skipnas(): @@ -218,7 +224,8 @@ def test_bam_compare_diff_files_skipnas(): is not included in the bedgraph file. """ outfile = '/tmp/test_file.bg' - for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: + #for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: + for A, B in [(BAMFILE_A, BAMFILE_B)]: args = "--bamfile1 {} --bamfile2 {} --scaleFactors 1:1 --operation subtract " \ "-o {} -p 1 --outFileFormat bedgraph --skipNAs".format(A, B, outfile).split() bam_comp.main(args) @@ -236,7 +243,8 @@ def test_bam_compare_extend(): Test read extension """ outfile = '/tmp/test_file.bg' - for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: + #for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: + for A, B in [(BAMFILE_A, BAMFILE_B)]: args = "--bamfile1 {} --bamfile2 {} --extend 100 --scaleFactors 1:1 --operation subtract " \ "-o {} -p 1 --outFileFormat bedgraph".format(A, B, outfile).split() bam_comp.main(args) @@ -254,7 +262,8 @@ def test_bam_compare_scale_factors_ratio(): Test scale factor """ outfile = '/tmp/test_file.bg' - for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: + #for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: + for A, B in [(BAMFILE_A, BAMFILE_B)]: args = "--bamfile1 {} --bamfile2 {} --operation ratio --ignoreForNormalization chr_cigar " \ "-o {} -p 1 --outFileFormat bedgraph".format(A, B, outfile).split() bam_comp.main(args) @@ -284,7 +293,7 @@ def test_bam_compare_scale_factors_ratio(): (scale factors [1,0.5]) (1+1)/(1+1*0.5)=1.33 """ - expected = ['3R\t0\t50\t1\n', '3R\t50\t100\t0.666667\n', '3R\t100\t150\t1.33333\n', '3R\t150\t200\t1\n'] + expected = ['3R\t0\t50\t1\n', '3R\t50\t100\t0.6666667\n', '3R\t100\t150\t1.3333334\n', '3R\t150\t200\t1\n'] assert f"{resp}" == f"{expected}", f"{resp} != {expected}" unlink(outfile) @@ -294,9 +303,10 @@ def test_bam_compare_scale_factors_subtract(): Test scale factor """ outfile = '/tmp/test_file.bg' - for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: + #for A, B in [(BAMFILE_A, BAMFILE_B), (CRAMFILE_A, CRAMFILE_B)]: + for A, B in [(BAMFILE_A, BAMFILE_B)]: args = "--bamfile1 {} --bamfile2 {} --operation subtract --ignoreForNormalization chr_cigar " \ - "-o {} -p 1 --outFileFormat bedgraph --scaleFactorsMethod None --normalizeUsing CPM".format(A, B, outfile).split() + "-o {} -p 1 --outFileFormat bedgraph --scaleFactorsMethod None --normalizeUsing CPM --verbose".format(A, B, outfile).split() bam_comp.main(args) # The scale factors are [ 1. 0.5] because BAMFILE_B has dowble the amount of reads (4) compared to BAMFILE_A @@ -333,12 +343,13 @@ def test_bam_compare_scale_factors_subtract(): def test_bam_coverage_filter_blacklist(): """ - Test --samFlagInclude --samFlagExclude --minMappingQuality --ignoreDuplicates and --blackListFileName + Test --samFlagInclude --samFlagExclude --minMappingQuality and --blackListFileName """ outfile = '/tmp/test_file_filter.bg' - for fname in [BAMFILE_FILTER1, CRAMFILE_FILTER1]: + #for fname in [BAMFILE_FILTER1, CRAMFILE_FILTER1]: + for fname in [BAMFILE_FILTER1]: args = "--bam {} --normalizeUsing RPGC --effectiveGenomeSize 1400 -p 1 -o {} -of bedgraph --samFlagInclude 512 " \ - "--samFlagExclude 256 --minMappingQuality 5 --ignoreDuplicates " \ + "--samFlagExclude 256 --minMappingQuality 5 --verbose " \ "--blackListFileName {}".format(fname, outfile, BEDFILE_FILTER) args = args.split() bam_cov.main(args) @@ -347,15 +358,26 @@ def test_bam_coverage_filter_blacklist(): resp = _foo.readlines() _foo.close() - expected = ['3R\t0\t100\t0\n', '3R\t100\t150\t1.42338\n', - '3R\t150\t250\t4.88017\n', '3R\t250\t300\t3.05011\n', - '3R\t300\t400\t2.23675\n', '3R\t400\t450\t3.86347\n', - '3R\t450\t500\t4.06681\n', '3R\t500\t550\t2.03341\n', - '3R\t550\t600\t2.44009\n', '3R\t600\t650\t4.47349\n', - '3R\t650\t700\t3.45679\n', '3R\t700\t750\t3.66013\n', - '3R\t750\t800\t4.06681\n', '3R\t900\t950\t2.44009\n', - '3R\t950\t1000\t1.62672\n', '3R\t1000\t1050\t0.813362\n', - '3R\t1050\t1500\t0\n'] + expected = [ + "3R\t0\t100\t0\n", + "3R\t100\t150\t1.7521904\n", + "3R\t150\t200\t5.8406343\n", + "3R\t200\t250\t6.035322\n", + "3R\t250\t300\t3.6990685\n", + "3R\t300\t400\t2.5309415\n", + "3R\t400\t450\t4.283132\n", + "3R\t450\t500\t4.47782\n", + "3R\t500\t550\t2.141566\n", + "3R\t550\t600\t2.9203172\n", + "3R\t600\t650\t5.4512587\n", + "3R\t650\t750\t3.8937564\n", + "3R\t750\t800\t2.7256293\n", + "3R\t800\t900\t0\n", + "3R\t900\t950\t0.5840634\n", + "3R\t950\t1000\t1.3628147\n", + "3R\t1000\t1050\t0.77875125\n", + "3R\t1050\t1500\t0\n" + ] assert f"{resp}" == f"{expected}", f"{resp} != {expected}" unlink(outfile) @@ -366,16 +388,27 @@ def test_bam_coverage_offset1(): Test -bs 1 --Offset 1 """ outfile = '/tmp/test_offset.bw' - for fname in [BAMFILE_A, CRAMFILE_A]: - args = "--Offset 1 --bam {} -p 1 -bs 1 -o {}".format(fname, outfile) + #for fname in [BAMFILE_A, CRAMFILE_A]: + for fname in [BAMFILE_A]: + args = "--Offset 1 --bam {} -p 1 -bs 1 -o {} -of bedgraph --verbose ".format(fname, outfile) + print(args) args = args.split() bam_cov.main(args) - try: - # python 3 only - filecmp.clear_cache() - except: - pass - assert filecmp.cmp(outfile, "{}testA_offset1.bw".format(ROOT)) is True + _foo = open(outfile, 'r') + resp = _foo.readlines() + _foo.close() + + expected = [ + "3R\t0\t100\t0\n", + "3R\t100\t101\t1\n", + "3R\t101\t199\t0\n", + "3R\t199\t200\t1\n", + "chr_cigar\t0\t10\t0\n", + "chr_cigar\t10\t11\t1\n", + "chr_cigar\t11\t200\t0\n" + ] + + assert f"{resp}" == f"{expected}", f"{resp} != {expected}" unlink(outfile) @@ -384,16 +417,26 @@ def test_bam_coverage_offset1_10(): Test -bs 1 --Offset 1 10 """ outfile = '/tmp/test_offset.bw' - for fname in [BAMFILE_A, CRAMFILE_A]: - args = "--Offset 1 10 -b {} -p 1 -bs 1 -o {}".format(fname, outfile) + #for fname in [BAMFILE_A, CRAMFILE_A]: + for fname in [BAMFILE_A]: + args = "--Offset 1 10 -b {} -p 1 -bs 1 -of bedgraph -o {}".format(fname, outfile) args = args.split() bam_cov.main(args) - try: - # python 3 only - filecmp.clear_cache() - except: - pass - assert filecmp.cmp(outfile, "{}testA_offset1_10.bw".format(ROOT)) is True + _foo = open(outfile, 'r') + resp = _foo.readlines() + _foo.close() + + expected = [ + "3R\t0\t100\t0\n", + "3R\t100\t110\t1\n", + "3R\t110\t190\t0\n", + "3R\t190\t200\t1\n", + "chr_cigar\t0\t10\t0\n", + "chr_cigar\t10\t20\t1\n", + "chr_cigar\t20\t200\t0\n" + ] + + assert f"{resp}" == f"{expected}", f"{resp} != {expected}" unlink(outfile) @@ -402,16 +445,25 @@ def test_bam_coverage_offset_minus1(): Test -bs 1 --Offset -1 """ outfile = '/tmp/test_offset.bw' - for fname in [BAMFILE_A, CRAMFILE_A]: - args = "--Offset -1 -b {} -p 1 -bs 1 -o {}".format(fname, outfile) + #for fname in [BAMFILE_A, CRAMFILE_A]: + for fname in [BAMFILE_A]: + args = "--Offset -1 -b {} -p 1 -bs 1 -of bedgraph -o {}".format(fname, outfile) args = args.split() bam_cov.main(args) - try: - # python 3 only - filecmp.clear_cache() - except: - pass - assert filecmp.cmp(outfile, "{}testA_offset-1.bw".format(ROOT)) is True + _foo = open(outfile, 'r') + resp = _foo.readlines() + _foo.close() + + expected = [ + "3R\t0\t149\t0\n", + "3R\t149\t151\t1\n", + "3R\t151\t200\t0\n", + "chr_cigar\t0\t49\t0\n", + "chr_cigar\t49\t50\t1\n", + "chr_cigar\t50\t200\t0\n" + ] + + assert f"{resp}" == f"{expected}", f"{resp} != {expected}" unlink(outfile) @@ -420,16 +472,29 @@ def test_bam_coverage_offset20_minus4(): Test -bs 1 --Offset 20 -4 """ outfile = '/tmp/test_offset.bw' - for fname in [BAMFILE_A, CRAMFILE_A]: - args = "--Offset 20 -4 -b {} -p 1 -bs 1 -o {}".format(fname, outfile) + #for fname in [BAMFILE_A, CRAMFILE_A]: + for fname in [BAMFILE_A]: + args = "--Offset 20 -4 -b {} -p 1 -bs 1 -of bedgraph -o {}".format(fname, outfile) args = args.split() bam_cov.main(args) - try: - # python 3 only - filecmp.clear_cache() - except: - pass - assert filecmp.cmp(outfile, "{}testA_offset20_-4.bw".format(ROOT)) is True + _foo = open(outfile, 'r') + resp = _foo.readlines() + _foo.close() + + expected = [ + "3R\t0\t119\t0\n", + "3R\t119\t147\t1\n", + "3R\t147\t153\t0\n", + "3R\t153\t181\t1\n", + "3R\t181\t200\t0\n", + "chr_cigar\t0\t29\t0\n", + "chr_cigar\t29\t30\t1\n", + "chr_cigar\t30\t40\t0\n", + "chr_cigar\t40\t47\t1\n", + "chr_cigar\t47\t200\t0\n" + ] + + assert f"{resp}" == f"{expected}", f"{resp} != {expected}" unlink(outfile) @@ -438,25 +503,39 @@ def test_bam_compare_filter_blacklist(): Test --samFlagInclude --samFlagExclude --minMappingQuality --ignoreDuplicates and --blackListFileName """ outfile = '/tmp/test_file_filter.bg' - for A, B in [(BAMFILE_FILTER1, BAMFILE_FILTER2), (CRAMFILE_FILTER1, CRAMFILE_FILTER2)]: + #for A, B in [(BAMFILE_FILTER1, BAMFILE_FILTER2), (CRAMFILE_FILTER1, CRAMFILE_FILTER2)]: + for A, B in [(BAMFILE_FILTER1, BAMFILE_FILTER2)]: args = "-b1 {} -b2 {} -p 1 -o {} -of bedgraph --samFlagInclude 512 " \ - "--samFlagExclude 256 --minMappingQuality 5 --ignoreDuplicates " \ + "--samFlagExclude 256 --minMappingQuality 5 --verbose " \ "--blackListFileName {}".format(A, B, outfile, BEDFILE_FILTER) + print(args) args = args.split() bam_comp.main(args) _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() - expected = ['3R\t0\t100\t0\n', '3R\t100\t150\t-0.220909\n', - '3R\t150\t200\t-0.159356\n', '3R\t200\t250\t-0.0718929\n', - '3R\t250\t300\t0.135883\n', '3R\t300\t350\t0.103093\n', - '3R\t350\t400\t-0.0895516\n', '3R\t400\t450\t0.0308374\n', - '3R\t450\t500\t0.0989418\n', '3R\t500\t550\t0.207044\n', - '3R\t550\t600\t0.0198996\n', '3R\t600\t650\t-0.0957241\n', - '3R\t650\t700\t0.00968255\n', '3R\t700\t750\t-0.040642\n', - '3R\t750\t800\t-0.123451\n', '3R\t900\t950\t0.212545\n', - '3R\t950\t1000\t0.199309\n', '3R\t1000\t1050\t0.167945\n', - '3R\t1050\t1500\t0\n'] + expected = [ + "3R\t0\t100\t0\n", + "3R\t100\t150\t-0.13392672\n", + "3R\t150\t200\t-0.097087175\n", + "3R\t200\t250\t0.034750275\n", + "3R\t250\t300\t0.21310511\n", + "3R\t300\t350\t0.09064066\n", + "3R\t350\t400\t-0.10200438\n", + "3R\t400\t450\t-0.009409866\n", + "3R\t450\t500\t0.04944959\n", + "3R\t500\t550\t0.106793426\n", + "3R\t550\t600\t0.0784064\n", + "3R\t600\t650\t-0.009581514\n", + "3R\t650\t700\t-0.009331797\n", + "3R\t700\t750\t-0.13486266\n", + "3R\t750\t800\t-0.17890581\n", + "3R\t800\t900\t0\n", + "3R\t900\t950\t0.3157759\n", + "3R\t950\t1000\t0.16194484\n", + "3R\t1000\t1050\t-0.006837439\n", + "3R\t1050\t1500\t0\n" + ] assert f"{resp}" == f"{expected}", f"{resp} != {expected}" unlink(outfile) From c907ec8af0870083fbc708a46be1290b025fafd5 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Wed, 16 Apr 2025 17:15:58 +0200 Subject: [PATCH 08/20] drop ignoreDups from plotter functions --- pydeeptools/deeptools/plotCoverage.py | 2 +- pydeeptools/deeptools/plotEnrichment.py | 34 ++++++++++++------------ pydeeptools/deeptools/plotFingerprint.py | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pydeeptools/deeptools/plotCoverage.py b/pydeeptools/deeptools/plotCoverage.py index 4278e2b742..0b6df5b4a0 100755 --- a/pydeeptools/deeptools/plotCoverage.py +++ b/pydeeptools/deeptools/plotCoverage.py @@ -188,7 +188,7 @@ def main(args=None): blackListFileName=args.blackListFileName, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, - ignoreDuplicates=args.ignoreDuplicates, + ignoreDuplicates=False, # ignoreDuplicates is no longer supported. center_read=args.centerReads, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, diff --git a/pydeeptools/deeptools/plotEnrichment.py b/pydeeptools/deeptools/plotEnrichment.py index 31c11f377e..d1d0c1e75d 100755 --- a/pydeeptools/deeptools/plotEnrichment.py +++ b/pydeeptools/deeptools/plotEnrichment.py @@ -326,23 +326,23 @@ def getEnrichment_worker(arglist): continue if args.maxFragmentLength > 0 and tLen > args.maxFragmentLength: continue - if args.ignoreDuplicates: - # Assuming more or less concordant reads, use the fragment bounds, otherwise the start positions - if tLen >= 0: - s = read.pos - e = s + tLen - else: - s = read.pnext - e = s - tLen - if read.reference_id != read.next_reference_id: - e = read.pnext - if lpos is not None and lpos == read.reference_start \ - and (s, e, read.next_reference_id, read.is_reverse) in prev_pos: - continue - if lpos != read.reference_start: - prev_pos.clear() - lpos = read.reference_start - prev_pos.add((s, e, read.next_reference_id, read.is_reverse)) + # if args.ignoreDuplicates: + # # Assuming more or less concordant reads, use the fragment bounds, otherwise the start positions + # if tLen >= 0: + # s = read.pos + # e = s + tLen + # else: + # s = read.pnext + # e = s - tLen + # if read.reference_id != read.next_reference_id: + # e = read.pnext + # if lpos is not None and lpos == read.reference_start \ + # and (s, e, read.next_reference_id, read.is_reverse) in prev_pos: + # continue + # if lpos != read.reference_start: + # prev_pos.clear() + # lpos = read.reference_start + # prev_pos.add((s, e, read.next_reference_id, read.is_reverse)) total[idx] += 1 # Get blocks, possibly extending diff --git a/pydeeptools/deeptools/plotFingerprint.py b/pydeeptools/deeptools/plotFingerprint.py index fb9651a986..f0756bc2a8 100755 --- a/pydeeptools/deeptools/plotFingerprint.py +++ b/pydeeptools/deeptools/plotFingerprint.py @@ -381,7 +381,7 @@ def main(args=None): region=args.region, extendReads=args.extendReads, minMappingQuality=args.minMappingQuality, - ignoreDuplicates=args.ignoreDuplicates, + ignoreDuplicates=False, # ignoreDuplicates is no longer supported. center_read=args.centerReads, samFlag_include=args.samFlagInclude, samFlag_exclude=args.samFlagExclude, From 4a910c1fc8cf93a0f2e590dfc025ab4e70fbda5e Mon Sep 17 00:00:00 2001 From: WardDeb Date: Wed, 16 Apr 2025 17:16:21 +0200 Subject: [PATCH 09/20] test updates --- .../deeptools/test/test_multiBamSummary.py | 29 +++++++++---------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/pydeeptools/deeptools/test/test_multiBamSummary.py b/pydeeptools/deeptools/test/test_multiBamSummary.py index c1716352e2..253a583735 100644 --- a/pydeeptools/deeptools/test/test_multiBamSummary.py +++ b/pydeeptools/deeptools/test/test_multiBamSummary.py @@ -1,4 +1,4 @@ -import deeptools.multiBamSummary as mbs +import deeptools.multiBamSummary2 as mbs import numpy as np import numpy.testing as nt @@ -15,16 +15,13 @@ def test_multiBamSummary_gtf(): outfile = '/tmp/_test.npz' - for fname in [BAM, CRAM]: + #for fname in [BAM, CRAM]: + for fname in [BAM]: args = 'BED-file --BED {0} -b {1} {1} -o {2}'.format(GTF, fname, outfile).split() mbs.main(args) resp = np.load(outfile) matrix = resp['matrix'] - labels = resp['labels'] - if fname == BAM: - nt.assert_equal(labels, ['test1.bam', 'test1.bam']) - else: - nt.assert_equal(labels, ['test1.cram', 'test1.cram']) + nt.assert_allclose(matrix, np.array([[144.0, 144.0], [143.0, 143.0]])) unlink(outfile) @@ -32,25 +29,25 @@ def test_multiBamSummary_gtf(): def test_multiBamSummary_metagene(): outfile = '/tmp/_test.npz' - for fname in [BAM, CRAM]: + #for fname in [BAM, CRAM]: + for fname in [BAM]: args = 'BED-file --BED {0} -b {1} {1} -o {2} --metagene'.format(GTF, fname, outfile).split() mbs.main(args) resp = np.load(outfile) matrix = resp['matrix'] - labels = resp['labels'] - if fname == BAM: - nt.assert_equal(labels, ['test1.bam', 'test1.bam']) - else: - nt.assert_equal(labels, ['test1.cram', 'test1.cram']) - nt.assert_allclose(matrix, np.array([[25.0, 25.0], + + nt.assert_allclose(matrix, np.array([[24.0, 24.0], [31.0, 31.0]])) unlink(outfile) def test_multiBamSummary_scalingFactors(): outfile = '/tmp/test.scalingFactors.txt' - args = 'bins --binSize 50 -b {} {} --scalingFactors {}'.format(BAMA, BAMB, outfile).split() + outfile2 = '/tmp/_test.npz' + args = 'bins --binSize 50 -b {} {} --scalingFactors {} -o {} --verbose'.format(BAMA, BAMB, outfile, outfile2).split() mbs.main(args) resp = open(outfile).read().strip().split('\n') - nt.assert_equal(resp, ["sample\tscalingFactor", "testA.bam\t1.1892", "testB.bam\t0.8409"]) + assert resp == ["Sample\tscalingFactor", "testA.bam\t1.1892071", "testB.bam\t0.8408964"] + nt.assert_equal(resp, ["Sample\tscalingFactor", "testA.bam\t1.1892071", "testB.bam\t0.8408964"]) unlink(outfile) + unlink(outfile2) From 49c1fc0b8a7af074fe90e077809fa6ce63a677c9 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Wed, 16 Apr 2025 17:16:33 +0200 Subject: [PATCH 10/20] properly parse bed6 (score & strand) --- src/filehandler.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/filehandler.rs b/src/filehandler.rs index b9018a3c3a..c49664db49 100644 --- a/src/filehandler.rs +++ b/src/filehandler.rs @@ -273,8 +273,8 @@ pub fn read_bedfile(bed_file: &String, metagene: bool, chroms: Vec<&String>) -> chrom: fields[0].to_string(), //chrom start: Revalue::U(start), //start end: Revalue::U(end), //end - score: ".".to_string(), //score - strand: ".".to_string(), //score + score: fields[4].to_string(), //score + strand: fields[5].to_string(), //strand name: entryname, //region name regionlength: end - start // regionlength } @@ -321,7 +321,7 @@ pub fn read_bedfile(bed_file: &String, metagene: bool, chroms: Vec<&String>) -> start: Revalue::V(starts), //start end: Revalue::V(ends), //end score: fields[4].to_string(), //score - strand: fields[5].to_string(), //score + strand: fields[5].to_string(), //strand name: entryname, //region name regionlength: length // regionlength } @@ -336,7 +336,7 @@ pub fn read_bedfile(bed_file: &String, metagene: bool, chroms: Vec<&String>) -> start: Revalue::U(start), //start end: Revalue::U(end), //end score: fields[4].to_string(), //score - strand: fields[5].to_string(), //score + strand: fields[5].to_string(), //strand name: entryname, //region name regionlength: end - start // regionlength } From 43c2c8351e1a4fd28aa5a07bf983d9598079b914 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 17 Apr 2025 09:46:29 +0200 Subject: [PATCH 11/20] pytest under rust venv with maturin install --- .github/workflows/test.yml | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3c964fd9f7..c91ec5ad4c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -48,12 +48,18 @@ jobs: with: python-version: ${{ matrix.python-version }} cache: 'pip' - - name: pip install - run: | - pip install .[actions] + - name: Install Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + - name: Set up virtual environment + run: python -m venv venv + - name: Activate virtual environment + run: source venv/bin/activate + - name: Install maturin + run: pip install maturin + - name: Build with maturin + run: maturin develop --release --extras actions - name: Test deepTools - run: | - pytest -v + run: pytest -v - name: Build wheel run: | python -m build @@ -68,10 +74,16 @@ jobs: - uses: actions/setup-python@v5 with: python-version: '3.12' - cache: 'pip' - - name: pip install - run: | - pip install .[actions] + cache: 'pip' + - name: Install Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + - name: Set up virtual environment + run: python -m venv venv + - name: Activate virtual environment + run: source venv/bin/activate + - name: Install maturin + run: pip install maturin + - name: Build with maturin + run: maturin develop --release --extras actions - name: Test deepTools - run: | - pytest -v + run: pytest -v \ No newline at end of file From 4919387e391a2ab767b8fbc7da670ef9c4f9e8d4 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 17 Apr 2025 09:53:59 +0200 Subject: [PATCH 12/20] drop venv from test action --- .github/workflows/test.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c91ec5ad4c..dd959ff061 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -50,10 +50,6 @@ jobs: cache: 'pip' - name: Install Rust uses: actions-rust-lang/setup-rust-toolchain@v1 - - name: Set up virtual environment - run: python -m venv venv - - name: Activate virtual environment - run: source venv/bin/activate - name: Install maturin run: pip install maturin - name: Build with maturin @@ -77,10 +73,6 @@ jobs: cache: 'pip' - name: Install Rust uses: actions-rust-lang/setup-rust-toolchain@v1 - - name: Set up virtual environment - run: python -m venv venv - - name: Activate virtual environment - run: source venv/bin/activate - name: Install maturin run: pip install maturin - name: Build with maturin From c402c36b07b26a0e725f6b4eaa9db454cd7a08c7 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 17 Apr 2025 10:03:33 +0200 Subject: [PATCH 13/20] regroup test actions into build / test / wheel names --- .github/workflows/test.yml | 63 +++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dd959ff061..14752654af 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -50,32 +50,53 @@ jobs: cache: 'pip' - name: Install Rust uses: actions-rust-lang/setup-rust-toolchain@v1 - - name: Install maturin - run: pip install maturin - - name: Build with maturin - run: maturin develop --release --extras actions - - name: Test deepTools - run: pytest -v - - name: Build wheel + - name: build deeptools run: | + python -m venv venv + source venv/bin/activate + pip install maturin + maturin develop --release --extras actions + - name: test deeptools + run: | + source venc/bin/activate + pytest -v + - name: build wheel + run: | + source venv/bin/activate python -m build - - name: Test wheel + - name: test wheel run: | + source venv/bin/activate pip install dist/*whl build-osx: name: Test on OSX runs-on: macOS-latest + strategy: + matrix: + python-version: ['3.9','3.10', '3.11', '3.12'] steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: '3.12' - cache: 'pip' - - name: Install Rust - uses: actions-rust-lang/setup-rust-toolchain@v1 - - name: Install maturin - run: pip install maturin - - name: Build with maturin - run: maturin develop --release --extras actions - - name: Test deepTools - run: pytest -v \ No newline at end of file + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install Rust + uses: actions-rust-lang/setup-rust-toolchain@v1 + - name: build deeptools + run: | + python -m venv venv + source venv/bin/activate + pip install maturin + maturin develop --release --extras actions + - name: test deeptools + run: | + source venc/bin/activate + pytest -v + - name: build wheel + run: | + source venv/bin/activate + python -m build + - name: test wheel + run: | + source venv/bin/activate + pip install dist/*whl \ No newline at end of file From c1a3c6e6cbf6c713a3ac8c115328ceb14a786d97 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 17 Apr 2025 10:09:17 +0200 Subject: [PATCH 14/20] typo venv --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 14752654af..f994faa306 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -58,7 +58,7 @@ jobs: maturin develop --release --extras actions - name: test deeptools run: | - source venc/bin/activate + source venv/bin/activate pytest -v - name: build wheel run: | @@ -90,7 +90,7 @@ jobs: maturin develop --release --extras actions - name: test deeptools run: | - source venc/bin/activate + source venv/bin/activate pytest -v - name: build wheel run: | From ee929a193dba2bfd9937e4638166181f99dcd3ba Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 17 Apr 2025 11:07:39 +0200 Subject: [PATCH 15/20] round of all floats to 2 decimal places --- src/bamcoverage.rs | 2 +- src/calc.rs | 18 ++++++++++++------ src/filehandler.rs | 7 ++++++- src/multibamsummary.rs | 5 ++++- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/bamcoverage.rs b/src/bamcoverage.rs index 4e6c4505d1..905641400d 100644 --- a/src/bamcoverage.rs +++ b/src/bamcoverage.rs @@ -208,7 +208,7 @@ pub fn r_bamcoverage( (fields[0].to_string(), Value { start: fields[1].parse::().unwrap(), end: fields[2].parse::().unwrap(), - value: fields[3].parse::().unwrap() * sf + value: (fields[3].parse::().unwrap() * sf * 100.0).round() / 100.0, }) ) } diff --git a/src/calc.rs b/src/calc.rs index 97fe36393b..c1500e9834 100644 --- a/src/calc.rs +++ b/src/calc.rs @@ -134,33 +134,39 @@ pub fn calc_ratio( "log2" => { let num: f32 = (cov1 * *sf1) + *pseudocount1; let den: f32 = (cov2 * *sf2) + *pseudocount2; - return (num / den).log2(); + let fcov: f32 = (num / den).log2(); + return (fcov * 100.0).round() / 100.0; } "ratio" => { let num: f32 = (cov1 * *sf1) + *pseudocount1; let den: f32 = (cov2 * *sf2) + *pseudocount2; - return num / den; + let fcov: f32 = num / den; + return (fcov * 100.0).round() / 100.0; } "reciprocal_ratio" => { let num: f32 = (cov1 * *sf1) + *pseudocount1; let den: f32 = (cov2 * *sf2) + *pseudocount2; let ratio: f32 = num / den; if ratio >= 1.0 { - return den / num; + let fcov: f32 = den / num; + return (fcov * 100.0).round() / 100.0; } else { - return -num / den; + let fcov: f32 = -num / den; + return (fcov * 100.0).round() / 100.0; } } "subtract" => { let num: f32 = (cov1 * *sf1) + *pseudocount1; let den: f32 = (cov2 * *sf2) + *pseudocount2; - return num - den; + let fcov: f32 = num - den; + return (fcov * 100.0).round() / 100.0; } _ => { // No operation is never allowed (on the py arg level, so just default to log2) let num: f32 = (cov1 * *sf1) + *pseudocount1; let den: f32 = (cov2 * *sf2) + *pseudocount2; - return (num / den).log2(); + let fcov: f32 = (num / den).log2(); + return (fcov * 100.0).round() / 100.0; } } } diff --git a/src/filehandler.rs b/src/filehandler.rs index c49664db49..73ee47726b 100644 --- a/src/filehandler.rs +++ b/src/filehandler.rs @@ -645,7 +645,12 @@ pub fn write_matrix( region.strand, // Strand field persisted from bedfile ); writerow.push_str( - &row.iter().map(|x| (scale_regions.scale * x).to_string()).collect::>().join("\t") + &row + .iter() + .map( + |x| ((scale_regions.scale * x * 100.0).round() / 100.0).to_string() + ) + .collect::>().join("\t") ); writerow.push_str("\n"); encoder.write_all(writerow.as_bytes()).unwrap(); diff --git a/src/multibamsummary.rs b/src/multibamsummary.rs index ca1d76fd1e..8cbec0e616 100644 --- a/src/multibamsummary.rs +++ b/src/multibamsummary.rs @@ -240,7 +240,10 @@ pub fn r_mbams( .map(|x| x.split('\t').collect()) .map(|x: Vec<&str> | ( x[0].to_string(), x[1].to_string(), x[2].to_string(), x[3].parse::().unwrap() ) ) .collect(); - let counts = lines.par_iter().map(|x| x.3).collect::>(); + let counts = lines + .par_iter() + .map(|x| (x.3 * 100.0).round() / 100.0) + .collect::>(); let regions: (String, String, String) = (lines[0].0.clone(), lines[0].1.clone(), lines[0].2.clone()); _matvec.push(counts); _regions.push(regions); From b1867122cff0f33f28ab89d7e09738f6a8dd46a1 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 17 Apr 2025 11:08:03 +0200 Subject: [PATCH 16/20] update test to accomodate rounded floats --- .../test/test_bamCoverage_and_bamCompare.py | 67 +++++++++---------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/pydeeptools/deeptools/test/test_bamCoverage_and_bamCompare.py b/pydeeptools/deeptools/test/test_bamCoverage_and_bamCompare.py index 0a084ebf2a..2c1c623875 100644 --- a/pydeeptools/deeptools/test/test_bamCoverage_and_bamCompare.py +++ b/pydeeptools/deeptools/test/test_bamCoverage_and_bamCompare.py @@ -180,7 +180,7 @@ def test_bam_compare_ZoverZ(): _foo = open(outfile, 'r') resp = _foo.readlines() _foo.close() - expected = ['3R\t50\t100\t-1\n', '3R\t100\t150\t0\n', '3R\t150\t200\t-0.5849624\n'] + expected = ['3R\t50\t100\t-1\n', '3R\t100\t150\t0\n', '3R\t150\t200\t-0.58\n'] assert f"{resp}" == f"{expected}", f"{resp} != {expected}" unlink(outfile) @@ -293,7 +293,7 @@ def test_bam_compare_scale_factors_ratio(): (scale factors [1,0.5]) (1+1)/(1+1*0.5)=1.33 """ - expected = ['3R\t0\t50\t1\n', '3R\t50\t100\t0.6666667\n', '3R\t100\t150\t1.3333334\n', '3R\t150\t200\t1\n'] + expected = ['3R\t0\t50\t1\n', '3R\t50\t100\t0.67\n', '3R\t100\t150\t1.33\n', '3R\t150\t200\t1\n'] assert f"{resp}" == f"{expected}", f"{resp} != {expected}" unlink(outfile) @@ -360,22 +360,22 @@ def test_bam_coverage_filter_blacklist(): expected = [ "3R\t0\t100\t0\n", - "3R\t100\t150\t1.7521904\n", - "3R\t150\t200\t5.8406343\n", - "3R\t200\t250\t6.035322\n", - "3R\t250\t300\t3.6990685\n", - "3R\t300\t400\t2.5309415\n", - "3R\t400\t450\t4.283132\n", - "3R\t450\t500\t4.47782\n", - "3R\t500\t550\t2.141566\n", - "3R\t550\t600\t2.9203172\n", - "3R\t600\t650\t5.4512587\n", - "3R\t650\t750\t3.8937564\n", - "3R\t750\t800\t2.7256293\n", + "3R\t100\t150\t1.75\n", + "3R\t150\t200\t5.84\n", + "3R\t200\t250\t6.04\n", + "3R\t250\t300\t3.7\n", + "3R\t300\t400\t2.53\n", + "3R\t400\t450\t4.28\n", + "3R\t450\t500\t4.48\n", + "3R\t500\t550\t2.14\n", + "3R\t550\t600\t2.92\n", + "3R\t600\t650\t5.45\n", + "3R\t650\t750\t3.89\n", + "3R\t750\t800\t2.73\n", "3R\t800\t900\t0\n", - "3R\t900\t950\t0.5840634\n", - "3R\t950\t1000\t1.3628147\n", - "3R\t1000\t1050\t0.77875125\n", + "3R\t900\t950\t0.58\n", + "3R\t950\t1000\t1.36\n", + "3R\t1000\t1050\t0.78\n", "3R\t1050\t1500\t0\n" ] @@ -517,24 +517,23 @@ def test_bam_compare_filter_blacklist(): _foo.close() expected = [ "3R\t0\t100\t0\n", - "3R\t100\t150\t-0.13392672\n", - "3R\t150\t200\t-0.097087175\n", - "3R\t200\t250\t0.034750275\n", - "3R\t250\t300\t0.21310511\n", - "3R\t300\t350\t0.09064066\n", - "3R\t350\t400\t-0.10200438\n", - "3R\t400\t450\t-0.009409866\n", - "3R\t450\t500\t0.04944959\n", - "3R\t500\t550\t0.106793426\n", - "3R\t550\t600\t0.0784064\n", - "3R\t600\t650\t-0.009581514\n", - "3R\t650\t700\t-0.009331797\n", - "3R\t700\t750\t-0.13486266\n", - "3R\t750\t800\t-0.17890581\n", + "3R\t100\t150\t-0.13\n", + "3R\t150\t200\t-0.1\n", + "3R\t200\t250\t0.03\n", + "3R\t250\t300\t0.21\n", + "3R\t300\t350\t0.09\n", + "3R\t350\t400\t-0.1\n", + "3R\t400\t450\t-0.01\n", + "3R\t450\t500\t0.05\n", + "3R\t500\t550\t0.11\n", + "3R\t550\t600\t0.08\n", + "3R\t600\t700\t-0.01\n", + "3R\t700\t750\t-0.13\n", + "3R\t750\t800\t-0.18\n", "3R\t800\t900\t0\n", - "3R\t900\t950\t0.3157759\n", - "3R\t950\t1000\t0.16194484\n", - "3R\t1000\t1050\t-0.006837439\n", + "3R\t900\t950\t0.32\n", + "3R\t950\t1000\t0.16\n", + "3R\t1000\t1050\t-0.01\n", "3R\t1050\t1500\t0\n" ] assert f"{resp}" == f"{expected}", f"{resp} != {expected}" From 75f9aef7cdf9d3f7537a1f99fdad745889c879a1 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Tue, 22 Apr 2025 17:35:04 +0200 Subject: [PATCH 17/20] update computeMatrix expected test files --- pydeeptools/deeptools/test/test_heatmapper/group1.bed | 2 +- pydeeptools/deeptools/test/test_heatmapper/group2.bed | 3 +-- pydeeptools/deeptools/test/test_heatmapper/master.mat | 2 +- pydeeptools/deeptools/test/test_heatmapper/master_TES.mat | 2 +- .../deeptools/test/test_heatmapper/master_center.mat | 4 ++-- .../test/test_heatmapper/master_extend_beyond_chr_size.mat | 2 +- pydeeptools/deeptools/test/test_heatmapper/master_gtf.mat | 6 +++--- .../deeptools/test/test_heatmapper/master_metagene.mat | 6 +++--- .../deeptools/test/test_heatmapper/master_multibed.mat | 2 +- .../deeptools/test/test_heatmapper/master_nan_to_zero.mat | 2 +- .../deeptools/test/test_heatmapper/master_scale_reg.mat | 2 +- .../deeptools/test/test_heatmapper/master_unscaled.mat | 2 +- pydeeptools/deeptools/test/test_heatmapper/test2.bed | 4 +--- 13 files changed, 18 insertions(+), 21 deletions(-) diff --git a/pydeeptools/deeptools/test/test_heatmapper/group1.bed b/pydeeptools/deeptools/test/test_heatmapper/group1.bed index e7c13ff6e1..0ac76bf2c2 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/group1.bed +++ b/pydeeptools/deeptools/test/test_heatmapper/group1.bed @@ -1,3 +1,3 @@ ch1 100 150 CG11023 0 + ch2 150 175 cda5 0 - -ch3 100 125 cda8 0 + +ch3 100 125 cda8 0 + \ No newline at end of file diff --git a/pydeeptools/deeptools/test/test_heatmapper/group2.bed b/pydeeptools/deeptools/test/test_heatmapper/group2.bed index c8cd07a881..d46e3934b0 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/group2.bed +++ b/pydeeptools/deeptools/test/test_heatmapper/group2.bed @@ -1,4 +1,3 @@ ch1 75 125 C11023 0 + ch2 125 150 ca5 0 - -ch3 75 100 ca8 0 + - +ch3 75 100 ca8 0 + \ No newline at end of file diff --git a/pydeeptools/deeptools/test/test_heatmapper/master.mat b/pydeeptools/deeptools/test/test_heatmapper/master.mat index 189bb027be..412be8721e 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/master.mat +++ b/pydeeptools/deeptools/test/test_heatmapper/master.mat @@ -1,4 +1,4 @@ -@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[0],"sample_labels":["test"],"downstream":[100],"unscaled 3 prime":[0],"group_labels":["Group 1","Group 2"],"bin size":[1],"upstream":[100],"group_boundaries":[0,3,6],"sample_boundaries":[0,200],"missing data as zero":false,"ref point":["TSS"],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} +@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[0],"sample_labels":["test"],"downstream":[100],"unscaled 3 prime":[0],"group_labels":["test2"],"bin size":[1],"upstream":[100],"group_boundaries":[0,6],"sample_boundaries":[0,200],"missing data as zero":false,"ref point":["TSS"],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 diff --git a/pydeeptools/deeptools/test/test_heatmapper/master_TES.mat b/pydeeptools/deeptools/test/test_heatmapper/master_TES.mat index c8e58d5ba4..5a5bc0b941 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/master_TES.mat +++ b/pydeeptools/deeptools/test/test_heatmapper/master_TES.mat @@ -1,4 +1,4 @@ -@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[0],"sample_labels":["test"],"downstream":[100],"unscaled 3 prime":[0],"group_labels":["Group 1","Group 2"],"bin size":[1],"upstream":[100],"group_boundaries":[0,3,6],"sample_boundaries":[0,200],"missing data as zero":false,"ref point":["TES"],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} +@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[0],"sample_labels":["test"],"downstream":[100],"unscaled 3 prime":[0],"group_labels":["test2"],"bin size":[1],"upstream":[100],"group_boundaries":[0,6],"sample_boundaries":[0,200],"missing data as zero":false,"ref point":["TES"],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 diff --git a/pydeeptools/deeptools/test/test_heatmapper/master_center.mat b/pydeeptools/deeptools/test/test_heatmapper/master_center.mat index 54257f2534..7f497516de 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/master_center.mat +++ b/pydeeptools/deeptools/test/test_heatmapper/master_center.mat @@ -1,7 +1,7 @@ -@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[0],"sample_labels":["test"],"downstream":[100],"unscaled 3 prime":[0],"group_labels":["Group 1","Group 2"],"bin size":[1],"upstream":[100],"group_boundaries":[0,3,6],"sample_boundaries":[0,200],"missing data as zero":false,"ref point":["center"],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} +@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[0],"sample_labels":["test"],"downstream":[100],"unscaled 3 prime":[0],"group_labels":["test2"],"bin size":[1],"upstream":[100],"group_boundaries":[0,6],"sample_boundaries":[0,200],"missing data as zero":false,"ref point":["center"],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch1 75 125 C11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 125 150 ca5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 -ch3 75 100 ca8 0.0 + nan nan nan nan nan nan nan nan nan nan nan nan nan 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 +ch3 75 100 ca8 0.0 + nan nan nan nan nan nan nan nan nan nan nan nan nan 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 \ No newline at end of file diff --git a/pydeeptools/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat b/pydeeptools/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat index d76bbaff2c..98a3926538 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat +++ b/pydeeptools/deeptools/test/test_heatmapper/master_extend_beyond_chr_size.mat @@ -1,4 +1,4 @@ -@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[0],"sample_labels":["test"],"downstream":[500],"unscaled 3 prime":[0],"group_labels":["group1.bed","group2.bed"],"bin size":[1],"upstream":[100],"group_boundaries":[0,3,6],"sample_boundaries":[0,600],"missing data as zero":false,"ref point":["TSS"],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} +@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[0],"sample_labels":["test"],"downstream":[500],"unscaled 3 prime":[0],"group_labels":["group1","group2"],"bin size":[1],"upstream":[100],"group_boundaries":[0,3,6],"sample_boundaries":[0,600],"missing data as zero":false,"ref point":["TSS"],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/pydeeptools/deeptools/test/test_heatmapper/master_gtf.mat b/pydeeptools/deeptools/test/test_heatmapper/master_gtf.mat index ca2e13540a..2fd11f9a77 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/master_gtf.mat +++ b/pydeeptools/deeptools/test/test_heatmapper/master_gtf.mat @@ -1,3 +1,3 @@ -@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[20],"body":[1000],"sample_labels":["test1.bw"],"downstream":[300],"unscaled 3 prime":[50],"group_labels":["genes"],"bin size":[10],"upstream":[500],"group_boundaries":[0,2],"sample_boundaries":[0,187],"missing data as zero":false,"ref point":[null],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} -3R 0 1000 first . + nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 4.000000 4.000000 5.644444 7.700000 7.700000 8.610000 9.000000 9.000000 12.000000 12.000000 12.833333 14.500000 14.500000 15.366667 15.800000 15.800000 21.700001 21.700001 24.550001 31.200001 31.200001 31.920001 32.400002 32.400002 32.400002 32.400002 33.044445 35.299999 35.299999 35.355556 35.400002 35.400002 35.311111 35.299999 35.233333 34.700001 34.700001 32.655556 30.100000 30.100000 25.900001 24.700001 24.711112 24.799999 24.799999 25.920000 27.600000 27.600000 28.790000 29.299999 29.299999 28.000000 28.000000 27.866667 27.600000 27.600000 28.333334 28.700001 28.700001 29.200001 29.200001 29.050001 28.700001 28.700001 29.977778 31.000000 31.000000 27.800000 27.400000 26.422222 23.000000 23.000000 22.222222 21.600000 21.600000 25.200000 26.100000 27.544445 39.099998 39.099998 40.211110 41.599998 41.599998 40.900000 40.700001 40.130001 35.000000 35.000000 34.920000 34.799999 34.799999 35.920001 36.400002 36.400002 42.200001 42.200001 40.800001 38.000000 38.000000 29.066667 24.600000 24.600000 20.400000 20.400000 18.700000 17.000000 17.000000 16.700001 16.700001 15.450000 14.200000 14.200000 11.000000 11.000000 7.850000 4.700000 4.700000 2.500000 2.500000 1.350000 0.200000 0.200000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan -3R 99 1100 second . - nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 0.200000 0.200000 1.350000 2.500000 2.500000 4.700000 4.700000 7.850000 11.000000 11.000000 13.560000 14.200000 14.477778 16.700001 16.700001 16.833334 17.000000 17.000000 19.644444 20.400000 20.866666 24.600000 24.600000 29.960000 38.000000 38.000000 40.940001 42.200001 42.200001 36.400002 36.400002 35.866667 34.799999 34.799999 34.933333 35.000000 35.000000 40.700001 40.700001 40.970000 41.599998 41.599998 40.099998 39.099998 39.099998 27.400000 26.100000 25.100000 21.600000 21.600000 22.377778 23.000000 23.000000 26.911111 27.400000 28.120000 31.000000 31.000000 29.850000 28.700001 28.700001 29.100001 29.200001 29.144445 28.700001 28.700001 28.211112 27.600000 27.600000 27.911111 28.000000 28.130000 29.299999 29.299999 28.620000 27.600000 27.600000 25.640000 24.799999 24.799999 24.700001 24.700001 26.500001 30.100000 30.100000 33.166667 34.700001 34.700001 35.299999 35.299999 35.330000 35.400002 35.400002 35.344445 35.299999 35.299999 32.722223 32.400002 32.400002 32.400002 32.400002 31.733334 31.200001 31.200001 22.755556 21.700001 21.044445 15.800000 15.800000 15.020000 14.500000 14.250000 12.000000 12.000000 10.200000 9.000000 8.870000 7.700000 7.700000 5.480000 4.000000 4.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +@{"upstream":[500],"downstream":[300],"body":[1000],"bin size":[10],"ref point":[null],"verbose":false,"bin avg type":"mean","missing data as zero":false,"min threshold":null,"max threshold":null,"scale":1,"skip zeros":false,"nan after end":false,"proc number":1,"sort regions":"keep","sort using":"mean","unscaled 5 prime":[20],"unscaled 3 prime":[50],"group_labels":["test"],"group_boundaries":[0,2],"sample_labels":["test1.bw"],"sample_boundaries":[0,187]} +3R 1 1000 first . + nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 4 4 6.06 7.7 7.7 8.86 9 9.33 12 12 13.11 14.5 14.5 15.51 15.8 16.46 21.7 21.7 25.92 31.2 31.2 32.13 32.4 32.4 32.4 32.4 33.37 35.3 35.3 35.37 35.4 35.4 35.3 35.3 35.17 34.7 34.7 32.14 30.1 30.1 25.3 24.7 24.72 24.8 24.8 26.36 27.6 27.6 28.92 29.3 29.16 28 28 27.82 27.6 27.6 28.46 28.7 28.7 29.2 29.2 29.03 28.7 28.7 30.23 31 31 27.4 27.4 25.93 23 23 22.07 21.6 21.6 25.6 26.1 28.99 39.1 39.1 40.49 41.6 41.6 40.8 40.7 40.07 35 35 34.91 34.8 34.8 36.04 36.4 37.04 42.2 42.2 40.33 38 38 29.07 24.6 24.6 20.4 20.4 18.7 17 17 16.7 16.7 15.45 14.2 14.2 11 11 7.85 4.7 4.7 2.5 2.5 1.35 0.2 0.2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +3R 100 1100 second . - nan nan nan nan nan nan nan nan nan nan 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.2 0.2 1.35 2.5 2.5 4.7 4.7 7.5 11 11 13.49 14.2 14.48 16.7 16.7 16.8 17 17 19.27 20.4 20.4 24.6 24.6 29.07 38 38 40.8 42.2 42.2 36.4 36.4 36.04 34.8 34.8 34.91 35 35 40.07 40.7 40.9 41.6 41.6 40.21 39.1 39.1 27.54 26.1 25.6 21.6 21.6 22.22 23 23 26.42 27.4 27.8 31 31 30.23 28.7 28.7 29.03 29.2 29.2 28.7 28.7 28.33 27.6 27.6 27.87 28 28 29.3 29.3 28.92 27.6 27.6 26.04 24.8 24.8 24.71 24.7 25.9 30.1 30.1 32.66 34.7 34.7 35.23 35.3 35.31 35.4 35.4 35.36 35.3 35.3 33.04 32.4 32.4 32.4 32.4 32 31.2 31.2 24.87 21.7 21.7 15.8 15.8 15.15 14.5 14.5 12 12 10.5 9 9 7.7 7.7 5.85 4 4 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/pydeeptools/deeptools/test/test_heatmapper/master_metagene.mat b/pydeeptools/deeptools/test/test_heatmapper/master_metagene.mat index cd587ab9b9..7938ee6182 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/master_metagene.mat +++ b/pydeeptools/deeptools/test/test_heatmapper/master_metagene.mat @@ -1,3 +1,3 @@ -@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[20],"body":[1000],"sample_labels":["test1.bw"],"downstream":[300],"unscaled 3 prime":[50],"group_labels":["genes"],"bin size":[10],"upstream":[500],"group_boundaries":[0,2],"sample_boundaries":[0,187],"missing data as zero":false,"ref point":[null],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} -3R 0,399,979 50,510,1000 first . + nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 4.000000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 7.700000 24.700001 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 24.799999 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 29.299999 28.000000 28.000000 28.000000 28.000000 28.000000 28.000000 27.960000 26.540000 17.000000 17.000000 16.700001 16.700001 15.450000 14.200000 14.200000 11.000000 11.000000 7.850000 4.700000 4.700000 2.500000 2.500000 1.350000 0.200000 0.200000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan -3R 99,499,1079 150,610,1100 second . - nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 0.200000 0.200000 1.350000 2.500000 2.500000 4.700000 4.700000 17.850000 31.000000 31.000000 31.000000 31.000000 31.000000 31.000000 31.000000 31.000000 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 29.200001 28.950001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 28.700001 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 27.600000 28.000000 15.800000 15.800000 15.800000 15.020000 14.500000 14.250000 12.000000 12.000000 10.200000 9.000000 8.870000 7.700000 7.700000 5.480000 4.000000 4.000000 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan +@{"upstream":[500],"downstream":[300],"body":[1000],"bin size":[10],"ref point":[null],"verbose":false,"bin avg type":"mean","missing data as zero":false,"min threshold":null,"max threshold":null,"scale":1,"skip zeros":false,"nan after end":false,"proc number":1,"sort regions":"keep","sort using":"mean","unscaled 5 prime":[20],"unscaled 3 prime":[50],"group_labels":["test"],"group_boundaries":[0,2],"sample_labels":["test1.bw"],"sample_boundaries":[0,187]} +3R 1,400,980 50,510,1000 first . + nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan 4 4 4 4 4 4 4 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 24.8 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 29.3 28 28 28 28 28 28 28 27.6 17 17 16.7 16.7 15.45 14.2 14.2 11 11 7.85 4.7 4.7 2.5 2.5 1.35 0.2 0.2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +3R 100,500,1080 150,610,1100 second . - nan nan nan nan nan nan nan nan nan nan 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.2 0.2 1.35 2.5 2.5 4.7 4.7 31 31 31 31 31 31 31 31 31 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 27.6 15.8 15.8 15.8 15.15 14.5 14.5 12 12 10.5 9 9 7.7 7.7 5.85 4 4 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan diff --git a/pydeeptools/deeptools/test/test_heatmapper/master_multibed.mat b/pydeeptools/deeptools/test/test_heatmapper/master_multibed.mat index 5da8a716d5..22994f332a 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/master_multibed.mat +++ b/pydeeptools/deeptools/test/test_heatmapper/master_multibed.mat @@ -1,4 +1,4 @@ -@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[0],"sample_labels":["test"],"downstream":[100],"unscaled 3 prime":[0],"group_labels":["group1.bed","group2.bed"],"bin size":[1],"upstream":[100],"group_boundaries":[0,3,6],"sample_boundaries":[0,200],"missing data as zero":false,"ref point":["TSS"],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} +@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[0],"sample_labels":["test"],"downstream":[100],"unscaled 3 prime":[0],"group_labels":["group1","group2"],"bin size":[1],"upstream":[100],"group_boundaries":[0,3,6],"sample_boundaries":[0,200],"missing data as zero":false,"ref point":["TSS"],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 diff --git a/pydeeptools/deeptools/test/test_heatmapper/master_nan_to_zero.mat b/pydeeptools/deeptools/test/test_heatmapper/master_nan_to_zero.mat index 851393f90b..f8b3902e5e 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/master_nan_to_zero.mat +++ b/pydeeptools/deeptools/test/test_heatmapper/master_nan_to_zero.mat @@ -1,4 +1,4 @@ -@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[0],"sample_labels":["test"],"downstream":[100],"unscaled 3 prime":[0],"group_labels":["Group 1","Group 2"],"bin size":[1],"upstream":[100],"group_boundaries":[0,3,6],"sample_boundaries":[0,200],"missing data as zero":true,"ref point":["TSS"],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} +@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[0],"sample_labels":["test"],"downstream":[100],"unscaled 3 prime":[0],"group_labels":["test2"],"bin size":[1],"upstream":[100],"group_boundaries":[0,6],"sample_boundaries":[0,200],"missing data as zero":true,"ref point":["TSS"],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 diff --git a/pydeeptools/deeptools/test/test_heatmapper/master_scale_reg.mat b/pydeeptools/deeptools/test/test_heatmapper/master_scale_reg.mat index dda2e78cd4..d41ca8570b 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/master_scale_reg.mat +++ b/pydeeptools/deeptools/test/test_heatmapper/master_scale_reg.mat @@ -1,4 +1,4 @@ -@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[100],"sample_labels":["test"],"downstream":[100],"unscaled 3 prime":[0],"group_labels":["Group 1","Group 2"],"bin size":[1],"upstream":[100],"group_boundaries":[0,3,6],"sample_boundaries":[0,300],"missing data as zero":false,"ref point":[null],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} +@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[0],"body":[100],"sample_labels":["test"],"downstream":[100],"unscaled 3 prime":[0],"group_labels":["test2"],"bin size":[1],"upstream":[100],"group_boundaries":[0,6],"sample_boundaries":[0,300],"missing data as zero":false,"ref point":[null],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} ch1 100 150 CG11023 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ch2 150 175 cda5 0.0 - 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 ch3 100 125 cda8 0.0 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 diff --git a/pydeeptools/deeptools/test/test_heatmapper/master_unscaled.mat b/pydeeptools/deeptools/test/test_heatmapper/master_unscaled.mat index e0b610db12..55c58a2da0 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/master_unscaled.mat +++ b/pydeeptools/deeptools/test/test_heatmapper/master_unscaled.mat @@ -1,2 +1,2 @@ -@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[100],"body":[1000],"sample_labels":["unscaled"],"downstream":[300],"unscaled 3 prime":[50],"group_labels":["genes"],"bin size":[10],"upstream":[500],"group_boundaries":[0,1],"sample_boundaries":[0,195],"missing data as zero":false,"ref point":[null],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} +@{"verbose":false,"scale":1,"skip zeros":false,"nan after end":false,"sort using":"mean","unscaled 5 prime":[100],"body":[1000],"sample_labels":["unscaled"],"downstream":[300],"unscaled 3 prime":[50],"group_labels":["unscaled"],"bin size":[10],"upstream":[500],"group_boundaries":[0,1],"sample_boundaries":[0,195],"missing data as zero":false,"ref point":[null],"min threshold":null,"sort regions":"keep","proc number":1,"bin avg type":"mean","max threshold":null} 1 500 1650 foo . . 1.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 2.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 3.000000 4.000000 4.000000 4.000000 4.000000 4.000000 5.000000 5.000000 5.000000 5.000000 5.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 6.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 7.000000 8.000000 8.000000 9.000000 9.000000 9.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 11.000000 diff --git a/pydeeptools/deeptools/test/test_heatmapper/test2.bed b/pydeeptools/deeptools/test/test_heatmapper/test2.bed index 9e1bcd59c6..401df74298 100644 --- a/pydeeptools/deeptools/test/test_heatmapper/test2.bed +++ b/pydeeptools/deeptools/test/test_heatmapper/test2.bed @@ -1,8 +1,6 @@ ch1 100 150 CG11023 0 + ch2 150 175 cda5 0 - ch3 100 125 cda8 0 + -#Group 1 ch1 75 125 C11023 0 + ch2 125 150 ca5 0 - -ch3 75 100 ca8 0 + -#Group 2 +ch3 75 100 ca8 0 + \ No newline at end of file From 1411bee605eced11a6271bb84a3bf26777e6f1d6 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Tue, 22 Apr 2025 17:35:23 +0200 Subject: [PATCH 18/20] computematrix testing with rust version --- .../deeptools/test/test_computeMatrix.py | 3 +- pydeeptools/deeptools/test/test_heatmapper.py | 391 +++++++++--------- 2 files changed, 208 insertions(+), 186 deletions(-) diff --git a/pydeeptools/deeptools/test/test_computeMatrix.py b/pydeeptools/deeptools/test/test_computeMatrix.py index b27e8ccf7f..e7d8d886e0 100644 --- a/pydeeptools/deeptools/test/test_computeMatrix.py +++ b/pydeeptools/deeptools/test/test_computeMatrix.py @@ -1,4 +1,4 @@ -import deeptools.computeMatrix as cm +import deeptools.computeMatrix2 as cm import os.path from os import unlink @@ -20,6 +20,7 @@ def test_compute_matrix_with_reference_point_and_advance_options_1(): """ outfile = '/tmp/computematrix_1.gz' args = "reference-point --regionsFileName {} --scoreFileName {} -o {} -bs 10 --sortUsing sum --averageTypeBins sum -b 10 -a 10".format(REGIONS_IN1, BIGWIG_IN1, outfile).split() + print(' '.join(args)) cm.main(args) archieve_file_size = os.path.getsize(OUT_ARCHIEVE1) diff --git a/pydeeptools/deeptools/test/test_heatmapper.py b/pydeeptools/deeptools/test/test_heatmapper.py index 7eb9d6d78f..c787214299 100644 --- a/pydeeptools/deeptools/test/test_heatmapper.py +++ b/pydeeptools/deeptools/test/test_heatmapper.py @@ -1,10 +1,7 @@ import os import sys -import deeptools.computeMatrix -import deeptools.plotHeatmap -import deeptools.plotProfile -import deeptools.utilities +import deeptools.computeMatrix2 import json __author__ = 'Fidel' @@ -40,9 +37,28 @@ def cmpMatrices(f1, f2): sys.stderr.write("key in {} missing: {} not in {}\n".format(f2, k, p1.keys())) rv = False else: - if l1 != l2: - sys.stderr.write("lines differ:\n{}\n vs\n{}\n".format(l1, l2)) - rv = False + lix = 0 + lastrv = rv + for _i1, _i2 in zip(l1.split(), l2.split()): + if lix < 6: + try: + fi1 = float(_i1) + fi2 = float(_i2) + except ValueError: + fi1 = _i1 + fi2 = _i2 + if fi1 != fi2: + rv = False + else: + if _i1 == "nan" or _i2 == "nan": + if _i1 != _i2: + rv = False + else: + if float(_i1) != float(_i2): + rv = False + lix += 1 + if rv != lastrv: + sys.stderr.write(f"Lines {l1.split()[0:6]} and {l2.split()[0:6]} do not match.") file1.close() file2.close() return rv @@ -51,7 +67,8 @@ def cmpMatrices(f1, f2): def test_computeMatrix_reference_point(): args = "reference-point -R {0}/test2.bed -S {0}/test.bw -b 100 -a 100 " \ "--outFileName /tmp/_test.mat.gz -bs 1 -p 1".format(ROOT).split() - deeptools.computeMatrix.main(args) + print(args) + deeptools.computeMatrix2.main(args) os.system('gunzip -f /tmp/_test.mat.gz') assert cmpMatrices(ROOT + '/master.mat', '/tmp/_test.mat') is True os.remove('/tmp/_test.mat') @@ -60,7 +77,7 @@ def test_computeMatrix_reference_point(): def test_computeMatrix_reference_point_center(): args = "reference-point -R {0}/test2.bed -S {0}/test.bw -b 100 -a 100 --referencePoint center " \ "--outFileName /tmp/_test.mat.gz -bs 1 -p 1".format(ROOT).split() - deeptools.computeMatrix.main(args) + deeptools.computeMatrix2.main(args) os.system('gunzip -f /tmp/_test.mat.gz') assert cmpMatrices(ROOT + '/master_center.mat', '/tmp/_test.mat') is True os.remove('/tmp/_test.mat') @@ -69,7 +86,7 @@ def test_computeMatrix_reference_point_center(): def test_computeMatrix_reference_point_tes(): args = "reference-point -R {0}/test2.bed -S {0}/test.bw -b 100 -a 100 --referencePoint TES " \ "--outFileName /tmp/_test.mat.gz -bs 1 -p 1".format(ROOT).split() - deeptools.computeMatrix.main(args) + deeptools.computeMatrix2.main(args) os.system('gunzip -f /tmp/_test.mat.gz') assert cmpMatrices(ROOT + '/master_TES.mat', '/tmp/_test.mat') is True os.remove('/tmp/_test.mat') @@ -78,7 +95,7 @@ def test_computeMatrix_reference_point_tes(): def test_computeMatrix_reference_point_missing_data_as_zero(): args = "reference-point -R {0}/test2.bed -S {0}/test.bw -b 100 -a 100 " \ "--outFileName /tmp/_test.mat.gz -bs 1 -p 1 --missingDataAsZero".format(ROOT).split() - deeptools.computeMatrix.main(args) + deeptools.computeMatrix2.main(args) os.system('gunzip -f /tmp/_test.mat.gz') assert cmpMatrices(ROOT + '/master_nan_to_zero.mat', '/tmp/_test.mat') is True os.remove('/tmp/_test.mat') @@ -88,7 +105,7 @@ def test_computeMatrix_scale_regions(): args = "scale-regions -R {0}/test2.bed -S {0}/test.bw -b 100 -a 100 -m 100 " \ "--outFileName /tmp/_test2.mat.gz -bs 1 -p 1".format(ROOT).split() - deeptools.computeMatrix.main(args) + deeptools.computeMatrix2.main(args) os.system('gunzip -f /tmp/_test2.mat.gz') assert cmpMatrices(ROOT + '/master_scale_reg.mat', '/tmp/_test2.mat') is True os.remove('/tmp/_test2.mat') @@ -97,7 +114,7 @@ def test_computeMatrix_scale_regions(): def test_computeMatrix_multiple_bed(): args = "reference-point -R {0}/group1.bed {0}/group2.bed -S {0}/test.bw -b 100 -a 100 " \ "--outFileName /tmp/_test.mat.gz -bs 1 -p 1".format(ROOT).split() - deeptools.computeMatrix.main(args) + deeptools.computeMatrix2.main(args) os.system('gunzip -f /tmp/_test.mat.gz') assert cmpMatrices(ROOT + '/master_multibed.mat', '/tmp/_test.mat') is True os.remove('/tmp/_test.mat') @@ -106,7 +123,8 @@ def test_computeMatrix_multiple_bed(): def test_computeMatrix_region_extend_over_chr_end(): args = "reference-point -R {0}/group1.bed {0}/group2.bed -S {0}/test.bw -b 100 -a 500 " \ "--outFileName /tmp/_test.mat.gz -bs 1 -p 1".format(ROOT).split() - deeptools.computeMatrix.main(args) + print(args) + deeptools.computeMatrix2.main(args) os.system('gunzip -f /tmp/_test.mat.gz') assert cmpMatrices(ROOT + '/master_extend_beyond_chr_size.mat', '/tmp/_test.mat') is True os.remove('/tmp/_test.mat') @@ -115,7 +133,8 @@ def test_computeMatrix_region_extend_over_chr_end(): def test_computeMatrix_unscaled(): args = "scale-regions -S {0}/unscaled.bigWig -R {0}/unscaled.bed -a 300 -b 500 --unscaled5prime 100 --unscaled3prime 50 " \ "--outFileName /tmp/_test.mat.gz -bs 10 -p 1".format(ROOT).split() - deeptools.computeMatrix.main(args) + print(args) + deeptools.computeMatrix2.main(args) os.system('gunzip -f /tmp/_test.mat.gz') assert cmpMatrices(ROOT + '/master_unscaled.mat', '/tmp/_test.mat') is True os.remove('/tmp/_test.mat') @@ -124,7 +143,8 @@ def test_computeMatrix_unscaled(): def test_computeMatrix_gtf(): args = "scale-regions -S {0}../test_data/test1.bw.bw -R {0}../test_data/test.gtf -a 300 -b 500 --unscaled5prime 20 --unscaled3prime 50 " \ "--outFileName /tmp/_test_gtf.mat.gz -bs 10 -p 1".format(ROOT).split() - deeptools.computeMatrix.main(args) + print(args) + deeptools.computeMatrix2.main(args) os.system('gunzip -f /tmp/_test_gtf.mat.gz') assert cmpMatrices(ROOT + '/master_gtf.mat', '/tmp/_test_gtf.mat') is True os.remove('/tmp/_test_gtf.mat') @@ -133,177 +153,178 @@ def test_computeMatrix_gtf(): def test_computeMatrix_metagene(): args = "scale-regions -S {0}../test_data/test1.bw.bw -R {0}../test_data/test.gtf -a 300 -b 500 --unscaled5prime 20 --unscaled3prime 50 " \ "--outFileName /tmp/_test_metagene.mat.gz -bs 10 -p 1 --metagene".format(ROOT).split() - deeptools.computeMatrix.main(args) + print(args) + deeptools.computeMatrix2.main(args) os.system('gunzip -f /tmp/_test_metagene.mat.gz') assert cmpMatrices(ROOT + '/master_metagene.mat', '/tmp/_test_metagene.mat') is True os.remove('/tmp/_test_metagene.mat') -def test_chopRegions_body(): - region = [(0, 200), (300, 400), (800, 900)] - lbins, bodybins, rbins, padLeft, padRight = deeptools.heatmapper.chopRegions(region, left=0, right=0) - e_lbins = [] - e_rbins = [] - e_padLeft = 0 - e_padRight = 0 - assert f"{lbins}" == f"{e_lbins}" - assert f"{rbins}" == f"{e_rbins}" - assert f"{bodybins}" == f"{region}" - assert f"{padLeft}" == f"{e_padLeft}" - assert f"{padRight}" == f"{e_padRight}" - # Unscaled 5', 3' - lbins, bodybins, rbins, padLeft, padRight = deeptools.heatmapper.chopRegions(region, left=150, right=150) - e_lbins = [(0, 150)] - e_rbins = [(350, 400), (800, 900)] - e_bodybins = [(150, 200), (300, 350)] - e_padLeft = 0 - e_padRight = 0 - assert f"{lbins}" == f"{e_lbins}" - assert f"{rbins}" == f"{e_rbins}" - assert f"{bodybins}" == f"{e_bodybins}" - assert f"{padLeft}" == f"{e_padLeft}" - assert f"{padRight}" == f"{e_padRight}" - - -def test_chopRegions_TSS(): - region = [(0, 200), (300, 400), (800, 900)] - # + strand, 250 downstream - downstream, body, unscaled3prime, padRight, _ = deeptools.heatmapper.chopRegions(region, left=250) - e_downstream = [(0, 200), (300, 350)] - e_body = [(350, 400), (800, 900)] - e_unscaled3prime = [] - e_padRight = 0 - e_ = 0 - assert f"{downstream}" == f"{e_downstream}" - assert f"{body}" == f"{e_body}" - assert f"{unscaled3prime}" == f"{e_unscaled3prime}" - assert f"{padRight}" == f"{e_padRight}" - assert f"{_}" == f"{e_}" - # + strand, 500 downstream - downstream, body, unscaled3prime, padRight, _ = deeptools.heatmapper.chopRegions(region, left=500) - e_body = [] - e_unscaled3prime = [] - e_padRight = 100 - e_ = 0 - assert f"{downstream}" == f"{region}" - assert f"{body}" == f"{e_body}" - assert f"{unscaled3prime}" == f"{e_unscaled3prime}" - assert f"{padRight}" == f"{e_padRight}" - assert f"{_}" == f"{e_}" - # - strand, 250 downstream (labeled "upstream" due to being on the - strand) - unscaled5prime, body, upstream, _, padLeft = deeptools.heatmapper.chopRegions(region, right=250) - e_upstream = [(150, 200), (300, 400), (800, 900)] - e_body = [(0, 150)] - e_unscaled5prime = [] - e_padLeft = 0 - e_ = 0 - assert f"{upstream}" == f"{e_upstream}" - assert f"{body}" == f"{e_body}" - assert f"{unscaled5prime}" == f"{e_unscaled5prime}" - assert f"{padLeft}" == f"{e_padLeft}" - assert f"{_}" == f"{e_}" - # - strand, 500 downstream (labeled "upstream" due to being on the - strand) - unscaled5prime, body, upstream, _, padLeft = deeptools.heatmapper.chopRegions(region, right=500) - e_body = [] - e_unscaled5prime = [] - e_padLeft = 100 - e_ = 0 - assert f"{upstream}" == f"{region}" - assert f"{body}" == f"{e_body}" - assert f"{unscaled5prime}" == f"{e_unscaled5prime}" - assert f"{padLeft}" == f"{e_padLeft}" - assert f"{_}" == f"{e_}" - - -def test_chopRegions_TES(): - region = [(0, 200), (300, 400), (800, 900)] - # + strand, 250 upstream - unscaled5prime, body, upstream, _, padLeft = deeptools.heatmapper.chopRegions(region, right=250) - e_unscaled5prime = [] - e_body = [(0, 150)] - e_upstream = [(150, 200), (300, 400), (800, 900)] - e_ = 0 - e_padLeft = 0 - assert f"{unscaled5prime}" == f"{e_unscaled5prime}" - assert f"{body}" == f"{e_body}" - assert f"{upstream}" == f"{e_upstream}" - assert f"{_}" == f"{e_}" - assert f"{padLeft}" == f"{e_padLeft}" - # + strand, 500 upstream - unscaled5prime, body, upstream, _, padLeft = deeptools.heatmapper.chopRegions(region, right=500) - e_unscaled5prime = [] - e_body = [] - e_ = 0 - e_padLeft = 100 - assert f"{unscaled5prime}" == f"{e_unscaled5prime}" - assert f"{body}" == f"{e_body}" - assert f"{upstream}" == f"{region}" - assert f"{_}" == f"{e_}" - assert f"{padLeft}" == f"{e_padLeft}" - # + strand, 250 downstream (labeled "upstream" due to being on the - strand) - downstream, body, unscaled3prime, padRight, _ = deeptools.heatmapper.chopRegions(region, left=250) - e_downstream = [(0, 200), (300, 350)] - e_body = [(350, 400), (800, 900)] - e_unscaled3prime = [] - e_padRight = 0 - e_ = 0 - assert f"{downstream}" == f"{e_downstream}" - assert f"{body}" == f"{e_body}" - assert f"{unscaled3prime}" == f"{e_unscaled3prime}" - assert f"{padRight}" == f"{e_padRight}" - assert f"{_}" == f"{e_}" - # + strand, 500 downstream (labeled "upstream" due to being on the - strand) - downstream, body, unscaled3prime, padRight, _ = deeptools.heatmapper.chopRegions(region, left=500) - e_body = [] - e_unscaled3prime = [] - e_padRight = 100 - e_ = 0 - assert f"{downstream}" == f"{region}" - assert f"{body}" == f"{e_body}" - assert f"{unscaled3prime}" == f"{e_unscaled3prime}" - assert f"{padRight}" == f"{e_padRight}" - assert f"{_}" == f"{e_}" - - -def test_chopRegionsFromMiddle(): - region = [(0, 200), (300, 400), (800, 900)] - # + strand, 100 upstream/200 downstream - upstream, downstream, padLeft, padRight = deeptools.heatmapper.chopRegionsFromMiddle(region, left=100, right=200) - e_upstream = [(100, 200)] - e_downstream = [(300, 400), (800, 900)] - e_padLeft = 0 - e_padRight = 0 - assert f"{upstream}" == f"{e_upstream}" - assert f"{downstream}" == f"{e_downstream}" - assert f"{padLeft}" == f"{e_padLeft}" - assert f"{padRight}" == f"{e_padRight}" - # + strand, 250 upstream/300 downstream - upstream, downstream, padLeft, padRight = deeptools.heatmapper.chopRegionsFromMiddle(region, left=250, right=300) - e_upstream = [(0, 200)] - e_downstream = [(300, 400), (800, 900)] - e_padLeft = 50 - e_padRight = 100 - assert f"{upstream}" == f"{e_upstream}" - assert f"{downstream}" == f"{e_downstream}" - assert f"{padLeft}" == f"{e_padLeft}" - assert f"{padRight}" == f"{e_padRight}" - # - strand, 100 upstream/200 downstream - upstream, downstream, padLeft, padRight = deeptools.heatmapper.chopRegionsFromMiddle(region, left=200, right=100) - e_upstream = [(0, 200)] - e_downstream = [(300, 400)] - e_padLeft = 0 - e_padRight = 0 - assert f"{upstream}" == f"{e_upstream}" - assert f"{downstream}" == f"{e_downstream}" - assert f"{padLeft}" == f"{e_padLeft}" - assert f"{padRight}" == f"{e_padRight}" - # - strand, 250 upstream/300 downstream - upstream, downstream, padLeft, padRight = deeptools.heatmapper.chopRegionsFromMiddle(region, left=300, right=250) - e_upstream = [(0, 200)] - e_downstream = [(300, 400), (800, 900)] - e_padLeft = 100 - e_padRight = 50 - assert f"{upstream}" == f"{e_upstream}" - assert f"{downstream}" == f"{e_downstream}" - assert f"{padLeft}" == f"{e_padLeft}" - assert f"{padRight}" == f"{e_padRight}" +# def test_chopRegions_body(): +# region = [(0, 200), (300, 400), (800, 900)] +# lbins, bodybins, rbins, padLeft, padRight = deeptools.heatmapper.chopRegions(region, left=0, right=0) +# e_lbins = [] +# e_rbins = [] +# e_padLeft = 0 +# e_padRight = 0 +# assert f"{lbins}" == f"{e_lbins}" +# assert f"{rbins}" == f"{e_rbins}" +# assert f"{bodybins}" == f"{region}" +# assert f"{padLeft}" == f"{e_padLeft}" +# assert f"{padRight}" == f"{e_padRight}" +# # Unscaled 5', 3' +# lbins, bodybins, rbins, padLeft, padRight = deeptools.heatmapper.chopRegions(region, left=150, right=150) +# e_lbins = [(0, 150)] +# e_rbins = [(350, 400), (800, 900)] +# e_bodybins = [(150, 200), (300, 350)] +# e_padLeft = 0 +# e_padRight = 0 +# assert f"{lbins}" == f"{e_lbins}" +# assert f"{rbins}" == f"{e_rbins}" +# assert f"{bodybins}" == f"{e_bodybins}" +# assert f"{padLeft}" == f"{e_padLeft}" +# assert f"{padRight}" == f"{e_padRight}" + + +# def test_chopRegions_TSS(): +# region = [(0, 200), (300, 400), (800, 900)] +# # + strand, 250 downstream +# downstream, body, unscaled3prime, padRight, _ = deeptools.heatmapper.chopRegions(region, left=250) +# e_downstream = [(0, 200), (300, 350)] +# e_body = [(350, 400), (800, 900)] +# e_unscaled3prime = [] +# e_padRight = 0 +# e_ = 0 +# assert f"{downstream}" == f"{e_downstream}" +# assert f"{body}" == f"{e_body}" +# assert f"{unscaled3prime}" == f"{e_unscaled3prime}" +# assert f"{padRight}" == f"{e_padRight}" +# assert f"{_}" == f"{e_}" +# # + strand, 500 downstream +# downstream, body, unscaled3prime, padRight, _ = deeptools.heatmapper.chopRegions(region, left=500) +# e_body = [] +# e_unscaled3prime = [] +# e_padRight = 100 +# e_ = 0 +# assert f"{downstream}" == f"{region}" +# assert f"{body}" == f"{e_body}" +# assert f"{unscaled3prime}" == f"{e_unscaled3prime}" +# assert f"{padRight}" == f"{e_padRight}" +# assert f"{_}" == f"{e_}" +# # - strand, 250 downstream (labeled "upstream" due to being on the - strand) +# unscaled5prime, body, upstream, _, padLeft = deeptools.heatmapper.chopRegions(region, right=250) +# e_upstream = [(150, 200), (300, 400), (800, 900)] +# e_body = [(0, 150)] +# e_unscaled5prime = [] +# e_padLeft = 0 +# e_ = 0 +# assert f"{upstream}" == f"{e_upstream}" +# assert f"{body}" == f"{e_body}" +# assert f"{unscaled5prime}" == f"{e_unscaled5prime}" +# assert f"{padLeft}" == f"{e_padLeft}" +# assert f"{_}" == f"{e_}" +# # - strand, 500 downstream (labeled "upstream" due to being on the - strand) +# unscaled5prime, body, upstream, _, padLeft = deeptools.heatmapper.chopRegions(region, right=500) +# e_body = [] +# e_unscaled5prime = [] +# e_padLeft = 100 +# e_ = 0 +# assert f"{upstream}" == f"{region}" +# assert f"{body}" == f"{e_body}" +# assert f"{unscaled5prime}" == f"{e_unscaled5prime}" +# assert f"{padLeft}" == f"{e_padLeft}" +# assert f"{_}" == f"{e_}" + + +# def test_chopRegions_TES(): +# region = [(0, 200), (300, 400), (800, 900)] +# # + strand, 250 upstream +# unscaled5prime, body, upstream, _, padLeft = deeptools.heatmapper.chopRegions(region, right=250) +# e_unscaled5prime = [] +# e_body = [(0, 150)] +# e_upstream = [(150, 200), (300, 400), (800, 900)] +# e_ = 0 +# e_padLeft = 0 +# assert f"{unscaled5prime}" == f"{e_unscaled5prime}" +# assert f"{body}" == f"{e_body}" +# assert f"{upstream}" == f"{e_upstream}" +# assert f"{_}" == f"{e_}" +# assert f"{padLeft}" == f"{e_padLeft}" +# # + strand, 500 upstream +# unscaled5prime, body, upstream, _, padLeft = deeptools.heatmapper.chopRegions(region, right=500) +# e_unscaled5prime = [] +# e_body = [] +# e_ = 0 +# e_padLeft = 100 +# assert f"{unscaled5prime}" == f"{e_unscaled5prime}" +# assert f"{body}" == f"{e_body}" +# assert f"{upstream}" == f"{region}" +# assert f"{_}" == f"{e_}" +# assert f"{padLeft}" == f"{e_padLeft}" +# # + strand, 250 downstream (labeled "upstream" due to being on the - strand) +# downstream, body, unscaled3prime, padRight, _ = deeptools.heatmapper.chopRegions(region, left=250) +# e_downstream = [(0, 200), (300, 350)] +# e_body = [(350, 400), (800, 900)] +# e_unscaled3prime = [] +# e_padRight = 0 +# e_ = 0 +# assert f"{downstream}" == f"{e_downstream}" +# assert f"{body}" == f"{e_body}" +# assert f"{unscaled3prime}" == f"{e_unscaled3prime}" +# assert f"{padRight}" == f"{e_padRight}" +# assert f"{_}" == f"{e_}" +# # + strand, 500 downstream (labeled "upstream" due to being on the - strand) +# downstream, body, unscaled3prime, padRight, _ = deeptools.heatmapper.chopRegions(region, left=500) +# e_body = [] +# e_unscaled3prime = [] +# e_padRight = 100 +# e_ = 0 +# assert f"{downstream}" == f"{region}" +# assert f"{body}" == f"{e_body}" +# assert f"{unscaled3prime}" == f"{e_unscaled3prime}" +# assert f"{padRight}" == f"{e_padRight}" +# assert f"{_}" == f"{e_}" + + +# def test_chopRegionsFromMiddle(): +# region = [(0, 200), (300, 400), (800, 900)] +# # + strand, 100 upstream/200 downstream +# upstream, downstream, padLeft, padRight = deeptools.heatmapper.chopRegionsFromMiddle(region, left=100, right=200) +# e_upstream = [(100, 200)] +# e_downstream = [(300, 400), (800, 900)] +# e_padLeft = 0 +# e_padRight = 0 +# assert f"{upstream}" == f"{e_upstream}" +# assert f"{downstream}" == f"{e_downstream}" +# assert f"{padLeft}" == f"{e_padLeft}" +# assert f"{padRight}" == f"{e_padRight}" +# # + strand, 250 upstream/300 downstream +# upstream, downstream, padLeft, padRight = deeptools.heatmapper.chopRegionsFromMiddle(region, left=250, right=300) +# e_upstream = [(0, 200)] +# e_downstream = [(300, 400), (800, 900)] +# e_padLeft = 50 +# e_padRight = 100 +# assert f"{upstream}" == f"{e_upstream}" +# assert f"{downstream}" == f"{e_downstream}" +# assert f"{padLeft}" == f"{e_padLeft}" +# assert f"{padRight}" == f"{e_padRight}" +# # - strand, 100 upstream/200 downstream +# upstream, downstream, padLeft, padRight = deeptools.heatmapper.chopRegionsFromMiddle(region, left=200, right=100) +# e_upstream = [(0, 200)] +# e_downstream = [(300, 400)] +# e_padLeft = 0 +# e_padRight = 0 +# assert f"{upstream}" == f"{e_upstream}" +# assert f"{downstream}" == f"{e_downstream}" +# assert f"{padLeft}" == f"{e_padLeft}" +# assert f"{padRight}" == f"{e_padRight}" +# # - strand, 250 upstream/300 downstream +# upstream, downstream, padLeft, padRight = deeptools.heatmapper.chopRegionsFromMiddle(region, left=300, right=250) +# e_upstream = [(0, 200)] +# e_downstream = [(300, 400), (800, 900)] +# e_padLeft = 100 +# e_padRight = 50 +# assert f"{upstream}" == f"{e_upstream}" +# assert f"{downstream}" == f"{e_downstream}" +# assert f"{padLeft}" == f"{e_padLeft}" +# assert f"{padRight}" == f"{e_padRight}" From 1b42a044c46d0baea4cc92e3641661bba73bfea5 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Tue, 22 Apr 2025 17:36:33 +0200 Subject: [PATCH 19/20] weed out last computematrix bugs in negative stranded bed/gtf --- src/covcalc.rs | 106 ++++++++++++++++++++++----------------------- src/filehandler.rs | 38 ++++++++++++---- 2 files changed, 83 insertions(+), 61 deletions(-) diff --git a/src/covcalc.rs b/src/covcalc.rs index 1e5292fdf3..9c3d2dace0 100644 --- a/src/covcalc.rs +++ b/src/covcalc.rs @@ -533,9 +533,9 @@ impl Region { let mut absstart: i64 = anchorstart as i64 - scale_regions.upstream as i64; let absstop: i64 = anchorstop as i64 + scale_regions.downstream as i64; - + println!("+ - U, absstart = {}, anchorstart = {}", absstart, anchorstart); for binix in (absstart..anchorstart as i64).step_by(scale_regions.binsize as usize) { - if binix < 0 || binix as u32 >= chromend || (binix + scale_regions.binsize as i64) as u32 >= chromend { + if binix < 0 || binix as u32 > chromend || (binix + scale_regions.binsize as i64) as u32 > chromend { leftbins.push(Bin::Conbin(0,0)); } else if scale_regions.nan_after_end && binix as u32 <= *start { leftbins.push(Bin::Conbin(0,0)); @@ -545,7 +545,7 @@ impl Region { } for binix in (anchorstop as i64..absstop).step_by(scale_regions.binsize as usize) { - if binix < 0 || binix as u32 >= chromend || (binix + scale_regions.binsize as i64) as u32 >= chromend { + if binix < 0 || binix as u32 > chromend || (binix + scale_regions.binsize as i64) as u32 > chromend { rightbins.push(Bin::Conbin(0,0)); } else if scale_regions.nan_after_end && binix as u32 >= *end { rightbins.push(Bin::Conbin(0,0)); @@ -566,6 +566,7 @@ impl Region { for bin in rightbins.into_iter() { bins.push(bin); } + } (Revalue::V(start), Revalue::V(end)) => { let exons: Vec<(u32, u32)> = start.iter().zip(end.iter()) @@ -576,7 +577,7 @@ impl Region { let mut lastanchor: u32 = anchorstop; let mut walked_bps: u32 = 0; while walked_bps < scale_regions.downstream { - if lastanchor >= chromend { + if lastanchor > chromend { rightbins.push(Bin::Conbin(0,0)); walked_bps += scale_regions.binsize; } else { @@ -597,6 +598,7 @@ impl Region { let mut leftbins: Vec = Vec::new(); let mut lastanchor: u32 = anchorstart; let mut walked_bps: u32 = 0; + while walked_bps < scale_regions.upstream { if lastanchor == 0 { leftbins.push(Bin::Conbin(0,0)); @@ -642,7 +644,7 @@ impl Region { let mut absstart: i64 = anchorstop as i64 + scale_regions.upstream as i64; let absstop: i64 = anchorstart as i64 - scale_regions.downstream as i64; - + let steps: Vec<_> = (anchorstop as i64..absstart) .step_by(scale_regions.binsize as usize) .collect(); @@ -650,12 +652,12 @@ impl Region { if binix as u32 > chromend || (binix + scale_regions.binsize as i64) as u32 > chromend { rightbins.push(Bin::Conbin(0,0)); } else if scale_regions.nan_after_end && binix as u32 >= *end { - leftbins.push(Bin::Conbin(0,0)); + rightbins.push(Bin::Conbin(0,0)); } else { - leftbins.push(Bin::Conbin(binix as u32, (binix as u32) + scale_regions.binsize)); + rightbins.push(Bin::Conbin(binix as u32, (binix as u32) + scale_regions.binsize)); } } - + println!("- - U, absstop = {}, anchorstart = {}", absstop, anchorstart); let steps: Vec<_> = (absstop..anchorstart as i64) .step_by(scale_regions.binsize as usize) .collect(); @@ -663,12 +665,12 @@ impl Region { if binix < 0 { leftbins.push(Bin::Conbin(0,0)); } else if scale_regions.nan_after_end && binix as u32 + scale_regions.binsize <= *start { - rightbins.push(Bin::Conbin(0,0)); + leftbins.push(Bin::Conbin(0,0)); } else { - rightbins.push(Bin::Conbin(binix as u32, (binix as u32) + scale_regions.binsize)); + leftbins.push(Bin::Conbin(binix as u32, (binix as u32) + scale_regions.binsize)); } } - + for bin in rightbins.into_iter() { bins.push(bin); } @@ -692,7 +694,7 @@ impl Region { let mut lastanchor: u32 = anchorstop; let mut walked_bps: u32 = 0; while walked_bps < scale_regions.upstream { - if lastanchor >= chromend { + if lastanchor > chromend { rightbins.push(Bin::Conbin(0,0)); walked_bps += scale_regions.binsize; } else { @@ -731,6 +733,7 @@ impl Region { lastanchor = retanch; } } + // Note that now we need to go the exact opposite way as for the + strand as the 'highest position' is the 'starting point'. rightbins.reverse(); for bin in rightbins.into_iter() { @@ -803,7 +806,7 @@ impl Region { } let bodystart = *start + scale_regions.unscaled5prime; let bodyend = *end - scale_regions.unscaled3prime; - + // Get the bins over the body length. These need to be scaled, so similar to deeptools < 4, linspace is used. let neededbins = (scale_regions.regionbodylength / scale_regions.binsize) as usize; // There's multiple options here: @@ -811,13 +814,11 @@ impl Region { // regionbodylength / binsize > transcriptlength <= regionbodylength -> 1 >= binsize > binsize. // transcriptlength <= regionbodylength / binsize -> index repetitions with binsize of one. let scaledbinsize = std::cmp::min(std::cmp::max((bodyend - bodystart) / neededbins as u32, 1), scale_regions.binsize); - innerbins.extend( Array1::linspace(bodystart as f32, (bodyend - scaledbinsize) as f32, neededbins) - .mapv(|x| x as u32) + .mapv(|x| x.round() as u32) .map(|x| Bin::Conbin(*x, *x + scaledbinsize)) .into_iter() .collect::>() ); - // Combine the vectors and return let mut combined_bins = Vec::new(); if scale_regions.unscaled5prime > 0 { @@ -872,11 +873,11 @@ impl Region { } } un3bins.reverse(); - + let bodystart: u32; let bodyend: u32; if scale_regions.unscaled5prime > 0 { - bodystart = un5bins.last().unwrap().get_end(); + bodystart = un5bins.last().unwrap().get_end() - 1; } else { bodystart = *start.first().unwrap(); } @@ -932,7 +933,7 @@ impl Region { } let innerbins = Array1::linspace(0 as f32, ((truebodylength)/scaledbinsize) as f32, neededbins) - .mapv(|x| x as u32) + .mapv(|x| x.round() as u32) .map(|x| binmap.get(&x).unwrap().clone()) .into_iter() .collect::>(); @@ -959,45 +960,36 @@ impl Region { let mut un5bins: Vec = Vec::new(); let mut un3bins: Vec = Vec::new(); let mut innerbins: Vec = Vec::new(); - if scale_regions.unscaled5prime > 0 { - un5bins.extend((0..scale_regions.unscaled5prime) + if scale_regions.unscaled3prime > 0 { + un3bins.extend((0..scale_regions.unscaled3prime) .step_by(scale_regions.binsize as usize) - .map(|i| Bin::Conbin(*end - i - scale_regions.binsize, *end - i)) + .map(|i| Bin::Conbin(*start + i, *start + i + scale_regions.binsize)) .collect::>()); } - if scale_regions.unscaled3prime > 0 { - un3bins.extend( (0..scale_regions.unscaled3prime) + if scale_regions.unscaled5prime > 0 { + un5bins.extend( (0..scale_regions.unscaled5prime) .step_by(scale_regions.binsize as usize) .rev() - .map(|i| Bin::Conbin(*start + scale_regions.unscaled3prime - i - scale_regions.binsize, *start + scale_regions.unscaled3prime - i)) + .map(|i| Bin::Conbin(*end - i - scale_regions.binsize, *end - i)) .collect::>() ); } let bodystart = *start + scale_regions.unscaled3prime; let bodyend = *end - scale_regions.unscaled5prime; - + // Get the bins over the body length. These need to be scaled, so similar to deeptools < 4, linspace is used. let neededbins = (scale_regions.regionbodylength / scale_regions.binsize) as usize; // There's multiple options here: // transcriptlength >= regionbodylength -> linspace // regionbodylength / binsize > transcriptlength <= regionbodylength -> 1 >= binsize > binsize. // transcriptlength <= regionbodylength / binsize -> index repetitions with binsize of one. - let mut scaledbinsize = (bodyend - bodystart)/neededbins as u32; - if scaledbinsize == 0 { - scaledbinsize = 1; - } - if scaledbinsize > scale_regions.binsize { - scaledbinsize = scale_regions.binsize; - } - + let scaledbinsize = std::cmp::min(std::cmp::max((bodyend - bodystart) / neededbins as u32, 1), scale_regions.binsize); innerbins.extend( Array1::linspace(bodystart as f32, (bodyend - scaledbinsize) as f32, neededbins) - .mapv(|x| x as u32) + .mapv(|x| x.round() as u32) .map(|x| Bin::Conbin(*x, *x + scaledbinsize)) .into_iter() .collect::>() ); - - // Reverse innerbins to go from 3' -> 5' - innerbins.reverse(); + println!(""); // Combine the vectors and return let mut combined_bins = Vec::new(); if scale_regions.unscaled3prime > 0 { @@ -1034,7 +1026,6 @@ impl Region { } } un5bins.reverse(); - if scale_regions.unscaled3prime > 0 { let mut walked_bps: u32 = 0; let mut lastanchor: u32 = start[0]; @@ -1053,18 +1044,18 @@ impl Region { lastanchor = retanch; } } - + let bodystart: u32; let bodyend: u32; - if scale_regions.unscaled3prime > 0 { - bodystart = un3bins.last().unwrap().get_end(); - } else { - bodystart = *start.first().unwrap(); - } if scale_regions.unscaled5prime > 0 { bodyend = un5bins.first().unwrap().get_start(); } else { - bodyend = *end.last().unwrap(); + bodyend = *start.first().unwrap(); + } + if scale_regions.unscaled3prime > 0 { + bodystart = un3bins.last().unwrap().get_end() - 1; + } else { + bodystart = *end.last().unwrap(); } let truebodylength = self.regionlength - scale_regions.unscaled5prime - scale_regions.unscaled3prime; let neededbins = (scale_regions.regionbodylength / scale_regions.binsize) as usize; @@ -1113,19 +1104,20 @@ impl Region { } let innerbins = Array1::linspace(0 as f32, ((truebodylength)/scaledbinsize) as f32, neededbins) - .mapv(|x| x as u32) + .mapv(|x| x.round() as u32) .map(|x| binmap.get(&x).unwrap().clone()) .into_iter() .collect::>(); + // Combine the vectors and return let mut combined_bins = Vec::new(); - if scale_regions.unscaled5prime > 0 { - combined_bins.extend(un5bins.into_iter()); - } - combined_bins.extend(innerbins.into_iter()); if scale_regions.unscaled3prime > 0 { combined_bins.extend(un3bins.into_iter()); } + combined_bins.extend(innerbins.into_iter()); + if scale_regions.unscaled5prime > 0 { + combined_bins.extend(un5bins.into_iter()); + } return combined_bins; }, _ => panic!("Start and End are not either both u32, or Vecs. This means your regions file is ill-defined."), @@ -1150,6 +1142,9 @@ fn refpoint_exonwalker(exons: &Vec<(u32, u32)>, anchor: u32, binsize: u32, chrom match anchorix { Some(i) => { // anchor sits in an exon. Check if anchor + binsize is also in same exon. + if anchor + binsize > chromend { + return (Bin::Conbin(0,0), chromend); + } if anchor + binsize <= exons[i].1 { (Bin::Conbin(anchor, anchor + binsize), anchor + binsize) } else { @@ -1217,7 +1212,7 @@ fn refpoint_exonwalker(exons: &Vec<(u32, u32)>, anchor: u32, binsize: u32, chrom None => { // our anchor doesn't sit in exons. We just return the anchor + binsize as Bin if anchor + binsize > chromend { - (Bin::Conbin(anchor, chromend), chromend) + (Bin::Conbin(0, 0), chromend) } else { (Bin::Conbin(anchor, anchor + binsize), anchor + binsize) } @@ -1227,6 +1222,11 @@ fn refpoint_exonwalker(exons: &Vec<(u32, u32)>, anchor: u32, binsize: u32, chrom // Walk backwards (upstream, towards chromosome start) match anchorix { Some(i) => { + // Run a check to see if binsize > anchor. + if anchor < binsize { + // We are at the start of the chromosome. We need to return a Conbin. + return (Bin::Conbin(0, 0), 0); + } // anchor sits in an exon. Check if anchor - binsize is also in same exon. if anchor - binsize >= exons[i].0 { (Bin::Conbin(anchor - binsize, anchor), anchor - binsize) @@ -1296,7 +1296,7 @@ fn refpoint_exonwalker(exons: &Vec<(u32, u32)>, anchor: u32, binsize: u32, chrom None => { // our anchor doesn't sit in exons. We just return the anchor - binsize as Bin if anchor < binsize { - (Bin::Conbin(0, anchor), 0) + (Bin::Conbin(0, 0), 0) } else { (Bin::Conbin(anchor - binsize, anchor), anchor - binsize) } diff --git a/src/filehandler.rs b/src/filehandler.rs index 73ee47726b..f816a01a51 100644 --- a/src/filehandler.rs +++ b/src/filehandler.rs @@ -89,8 +89,8 @@ pub fn read_gtffile(gtf_file: &String, gtfparse: &Gtfparse, chroms: Vec<&String> } let fields: Vec<&str> = line.split('\t').collect(); if fields[2].to_string() == gtfparse.exonid { - let start = fields[3].parse().unwrap(); - let end = fields[4].parse().unwrap(); + let start: u32 = fields[3].parse().unwrap(); + let end: u32 = fields[4].parse().unwrap(); let txnid = fields[8] .split(';') .find(|x| x.trim().starts_with(gtfparse.txniddesignator.as_str())) @@ -154,8 +154,8 @@ pub fn read_gtffile(gtf_file: &String, gtfparse: &Gtfparse, chroms: Vec<&String> let fields: Vec<&str> = line.split('\t').collect(); if fields[2].to_string() == gtfparse.txnid { - let start = fields[3].parse().unwrap(); - let end = fields[4].parse().unwrap(); + let start: u32 = fields[3].parse().unwrap(); + let end: u32 = fields[4].parse().unwrap(); let mut entryname = fields[8] .split(';') .find(|x| x.trim().starts_with(gtfparse.txniddesignator.as_str())) @@ -533,9 +533,24 @@ pub fn header_matrix(scale_regions: &Scalingregions, regionsizes: HashMap>().into_iter().join(",")) ); - headstr.push_str( - &format!("\"ref point\":[\"{}\"],", (0..scale_regions.bwfiles).map(|_| scale_regions.referencepoint.clone()).collect::>().into_iter().join("\",\"")) - ); + // ref point can be empty (for scale_regions, for example). + // To keep compatibility with deepTools 3 it should be written as null + let refpointstring = (0..scale_regions.bwfiles) + .map(|_| scale_regions.referencepoint.clone()) + .collect::>() + .into_iter() + .join("\",\""); + + if refpointstring.is_empty() { + headstr.push_str( + &format!("\"ref point\":[null],") + ); + } else { + headstr.push_str( + &format!("\"ref point\":[\"{}\"],", refpointstring) + ); + } + headstr.push_str( &format!("\"verbose\":{},", scale_regions.verbose) ); @@ -648,7 +663,14 @@ pub fn write_matrix( &row .iter() .map( - |x| ((scale_regions.scale * x * 100.0).round() / 100.0).to_string() + |x| { + if x.is_nan() { + // as deepTools 3 encoded nan in matrix. + "nan".to_string() + } else { + ((scale_regions.scale * x * 100.0).round() / 100.0).to_string() + } + } ) .collect::>().join("\t") ); From 9eb0c88f25d6163ce4e217c0d3c0d74748840693 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Tue, 22 Apr 2025 17:38:54 +0200 Subject: [PATCH 20/20] reflect matrix changes in changelog --- docs/content/changelog.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/content/changelog.rst b/docs/content/changelog.rst index 8cc1b75624..72f7b6c2d8 100644 --- a/docs/content/changelog.rst +++ b/docs/content/changelog.rst @@ -38,7 +38,9 @@ Core - --quiet / -q option no longer exists. - bed files in computeMatrix no longer support '#' to define groups. - 'chromosome matching' i.e. chr1 <-> 1, chrMT <-> MT is no longer performed. - + - metagene mode erroneously 'nan'ed the before and after values (if they fell outside of the feature). This is fixed now. + - Rounding bahvior in matrix output only two decimals now, unscaled 5 and unscaled 3 prime are now strictly separated from the rest of the scaled region (for value calculation). + * normalization - Exactscaling is no longer an option, it's always performed. - SES option in bamCompare mode is no longer available.