diff --git a/pydeeptools/deeptools/test/test_data/filtered_out_reads.bam b/pydeeptools/deeptools/test/test_data/filtered_out_reads.bam new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pydeeptools/deeptools/test/test_data/log2ratio.bed b/pydeeptools/deeptools/test/test_data/log2ratio.bed new file mode 100644 index 0000000000..45e37e23af --- /dev/null +++ b/pydeeptools/deeptools/test/test_data/log2ratio.bed @@ -0,0 +1,3 @@ +3R 0 500 2.1154773 +3R 500 1000 1.4720684 +3R 1000 1500 0 diff --git a/pydeeptools/deeptools/test/test_data/test2.bam b/pydeeptools/deeptools/test/test_data/test2.bam index c53dc3b90c..036aa29a5e 100644 Binary files a/pydeeptools/deeptools/test/test_data/test2.bam and b/pydeeptools/deeptools/test/test_data/test2.bam differ diff --git a/pydeeptools/deeptools/test/test_data/test2.bam.bai b/pydeeptools/deeptools/test/test_data/test2.bam.bai index 6c98b7fd1a..1bf8fc6541 100644 Binary files a/pydeeptools/deeptools/test/test_data/test2.bam.bai and b/pydeeptools/deeptools/test/test_data/test2.bam.bai differ diff --git a/pydeeptools/deeptools/test/test_data/test4.bam b/pydeeptools/deeptools/test/test_data/test4.bam new file mode 100644 index 0000000000..ba9f13eb1e Binary files /dev/null and b/pydeeptools/deeptools/test/test_data/test4.bam differ diff --git a/pydeeptools/deeptools/test/test_data/test4.bam.bai b/pydeeptools/deeptools/test/test_data/test4.bam.bai new file mode 100644 index 0000000000..64fcc05561 Binary files /dev/null and b/pydeeptools/deeptools/test/test_data/test4.bam.bai differ diff --git a/pydeeptools/deeptools/test/test_data/test5.bam b/pydeeptools/deeptools/test/test_data/test5.bam new file mode 100644 index 0000000000..036aa29a5e Binary files /dev/null and b/pydeeptools/deeptools/test/test_data/test5.bam differ diff --git a/pydeeptools/deeptools/test/test_data/test5.bam.bai b/pydeeptools/deeptools/test/test_data/test5.bam.bai new file mode 100644 index 0000000000..1bf8fc6541 Binary files /dev/null and b/pydeeptools/deeptools/test/test_data/test5.bam.bai differ diff --git a/pydeeptools/deeptools/test/test_data/testB.bam.bai b/pydeeptools/deeptools/test/test_data/testB.bam.bai index 3510ccc819..4d683328b6 100644 Binary files a/pydeeptools/deeptools/test/test_data/testB.bam.bai and b/pydeeptools/deeptools/test/test_data/testB.bam.bai differ diff --git a/pydeeptools/deeptools/test/test_data/testC.bam b/pydeeptools/deeptools/test/test_data/testC.bam new file mode 100644 index 0000000000..f1ac616b78 Binary files /dev/null and b/pydeeptools/deeptools/test/test_data/testC.bam differ diff --git a/pydeeptools/deeptools/test/test_data/testC.bam.bai b/pydeeptools/deeptools/test/test_data/testC.bam.bai new file mode 100644 index 0000000000..4d683328b6 Binary files /dev/null and b/pydeeptools/deeptools/test/test_data/testC.bam.bai differ diff --git a/pydeeptools/deeptools/test/test_data/test_paired2.bam b/pydeeptools/deeptools/test/test_data/test_paired2.bam index 443fc8a337..2a2d896974 100644 Binary files a/pydeeptools/deeptools/test/test_data/test_paired2.bam and b/pydeeptools/deeptools/test/test_data/test_paired2.bam differ diff --git a/pydeeptools/deeptools/test/test_r_alignmentsieve.py b/pydeeptools/deeptools/test/test_r_alignmentsieve.py new file mode 100644 index 0000000000..de031b244b --- /dev/null +++ b/pydeeptools/deeptools/test/test_r_alignmentsieve.py @@ -0,0 +1,178 @@ +import pytest +from deeptools.alignmentSieve2 import r_alignmentsieve +import os.path + + +ROOT = os.path.dirname(os.path.abspath(__file__)) + "/test_data/" +BAMFILE_A = ROOT + "test_paired.bam" +BAMFILE_B = ROOT + "test_paired2.bam" + +def test_r_alignmentsieve_basic(): + bamifile = BAMFILE_A + ofile = BAMFILE_B + nproc = 4 + filter_metrics = ROOT + "filter_metrics_basic.txt" + filtered_out_readsfile = ROOT + "filtered_out_reads.bam" + verbose = False + shift = [] + _bed = False + filter_rna_strand = "None" + min_mapping_quality = 30 + sam_flag_incl = 0 + sam_flag_excl = 0 + _blacklist = "" + min_fragment_length = 0 + max_fragment_length = 1000 + extend_reads = 0 + center_reads = False + + result = r_alignmentsieve( + bamifile, ofile, nproc, filter_metrics, filtered_out_readsfile, verbose, shift, _bed, + filter_rna_strand, min_mapping_quality, sam_flag_incl, sam_flag_excl, _blacklist, + min_fragment_length, max_fragment_length, extend_reads, center_reads + ) + + expected_content = [ + "#bamFilterReads --filterMetrics", + "#File\tReads\tRemaining Total\tInitial Reads", + "test_paired.bam\t49\t49" + ] + + with open(filter_metrics, 'r') as f: + lines = f.readlines() + + # Remove the file path prefix from the third line + lines[2] = os.path.basename(lines[2]) + + # Strip newline characters from the lines + lines = [line.strip() for line in lines] + + assert lines == expected_content + + +def test_r_alignmentsieve_with_shift(): + bamifile = BAMFILE_A + ofile = BAMFILE_B + nproc = 4 + filter_metrics = ROOT + "filter_metrics_shift.txt" + filtered_out_readsfile = ROOT + "filtered_out_reads.bam" + verbose = False + shift = [] + _bed = False + filter_rna_strand = "None" + min_mapping_quality = 30 + sam_flag_incl = 0 + sam_flag_excl = 0 + _blacklist = "" + min_fragment_length = 0 + max_fragment_length = 1000 + extend_reads = 0 + center_reads = False + + result = r_alignmentsieve( + bamifile, ofile, nproc, filter_metrics, filtered_out_readsfile, verbose, shift, _bed, + filter_rna_strand, min_mapping_quality, sam_flag_incl, sam_flag_excl, _blacklist, + min_fragment_length, max_fragment_length, extend_reads, center_reads + ) + + expected_content = [ + "#bamFilterReads --filterMetrics", + "#File\tReads\tRemaining Total\tInitial Reads", + "test_paired.bam\t49\t49" + ] + + with open(filter_metrics, 'r') as f: + lines = f.readlines() + + # Remove the file path prefix from the third line + lines[2] = os.path.basename(lines[2]) + + # Strip newline characters from the lines + lines = [line.strip() for line in lines] + + assert lines == expected_content + + +def test_r_alignmentsieve_with_filtering(): + bamifile = BAMFILE_A + ofile = BAMFILE_B + nproc = 4 + filter_metrics = ROOT + "filter_metrics_withFiltering.txt" + filtered_out_readsfile = ROOT +"filtered_out_reads.bam" + verbose = False + shift = [] + _bed = False + filter_rna_strand = "forward" + min_mapping_quality = 30 + sam_flag_incl = 0 + sam_flag_excl = 0 + _blacklist = "" + min_fragment_length = 100 + max_fragment_length = 500 + extend_reads = 0 + center_reads = False + + result = r_alignmentsieve( + bamifile, ofile, nproc, filter_metrics, filtered_out_readsfile, verbose, shift, _bed, + filter_rna_strand, min_mapping_quality, sam_flag_incl, sam_flag_excl, _blacklist, + min_fragment_length, max_fragment_length, extend_reads, center_reads + ) + + expected_content = [ + "#bamFilterReads --filterMetrics", + "#File\tReads\tRemaining Total\tInitial Reads", + "test_paired.bam\t27\t49" + ] + + with open(filter_metrics, 'r') as f: + lines = f.readlines() + + # Remove the file path prefix from the third line + lines[2] = os.path.basename(lines[2]) + + # Strip newline characters from the lines + lines = [line.strip() for line in lines] + + assert lines == expected_content + +def test_r_alignmentsieve_with_filtering_extendReads(): + bamifile = BAMFILE_A + ofile = BAMFILE_B + nproc = 4 + filter_metrics = ROOT + "filter_metrics_extendReads.txt" + filtered_out_readsfile = ROOT +"filtered_out_reads.bam" + verbose = False + shift = [] + _bed = False + filter_rna_strand = "forward" + min_mapping_quality = 30 + sam_flag_incl = 0 + sam_flag_excl = 0 + _blacklist = "" + min_fragment_length = 100 + max_fragment_length = 500 + extend_reads = 100 + center_reads = False + + result = r_alignmentsieve( + bamifile, ofile, nproc, filter_metrics, filtered_out_readsfile, verbose, shift, _bed, + filter_rna_strand, min_mapping_quality, sam_flag_incl, sam_flag_excl, _blacklist, + min_fragment_length, max_fragment_length, extend_reads, center_reads + ) + + expected_content = [ + "#bamFilterReads --filterMetrics", + "#File\tReads\tRemaining Total\tInitial Reads", + "test_paired.bam\t27\t49" + ] + + with open(filter_metrics, 'r') as f: + lines = f.readlines() + + # Remove the file path prefix from the third line + lines[2] = os.path.basename(lines[2]) + + # Strip newline characters from the lines + lines = [line.strip() for line in lines] + + assert lines == expected_content diff --git a/pydeeptools/deeptools/test/test_r_bamCompare.py b/pydeeptools/deeptools/test/test_r_bamCompare.py new file mode 100644 index 0000000000..84a3a660b6 --- /dev/null +++ b/pydeeptools/deeptools/test/test_r_bamCompare.py @@ -0,0 +1,88 @@ +import pytest +from deeptools.bamCompare2 import r_bamcompare +import os.path +import filecmp +from os import unlink + +ROOT = os.path.dirname(os.path.abspath(__file__)) + "/test_data/" +BAMFILE_A = ROOT + "test1.bam" +BAMFILE_B = ROOT + "test2.bam" + + +def test_r_bamcompare(): + bamifile1 = BAMFILE_A + bamifile2 = BAMFILE_B + ofile = ROOT + "r_bamcompare_output.bedgraph" + ofiletype = "bedgraph" + norm = "None" + effective_genome_size = 0 + scalefactorsmethod = "None" + operation = "ratio" + pseudocount = 0 + # filtering options + ignoreduplicates = False + minmappingquality = 0 + samflaginclude = 0 + samflagexclude = 0 + minfraglen = 0 + maxfraglen = 0 + nproc = 1 + _ignorechr = [] + binsize = 500 + regions = [] + verbose = False + + # Call the Rust function + r_bamcompare( + bamifile1, bamifile2, ofile, ofiletype, norm, effective_genome_size, scalefactorsmethod, operation, + pseudocount, ignoreduplicates, minmappingquality, samflaginclude, samflagexclude, minfraglen, maxfraglen, nproc, + _ignorechr, binsize, regions, verbose + ) + # Add assertions to verify the expected behavior + expected = ['3R\t0\t500\t4.571429\n', + '3R\t500\t1000\t2.8333333\n', + '3R\t1000\t1500\t1\n',] + _foo = open(ofile, 'r') + resp = _foo.readlines() + _foo.close() + assert f"{resp}" == f"{expected}", f"{resp} != {expected}" + unlink(ofile) + +def test_r_bamcompare_RPKM(): + bamifile1 = BAMFILE_A + bamifile2 = BAMFILE_B + ofile = ROOT + "r_bamcompare_output.bedgraph" + ofiletype = "bedgraph" + norm = "RPKM" + effective_genome_size = 0 + scalefactorsmethod = "None" + operation = "ratio" + pseudocount = 0 + # filtering options + ignoreduplicates = False + minmappingquality = 0 + samflaginclude = 0 + samflagexclude = 0 + minfraglen = 0 + maxfraglen = 0 + nproc = 1 + _ignorechr = [] + binsize = 500 + regions = [] + verbose = False + + # Call the Rust function + r_bamcompare( + bamifile1, bamifile2, ofile, ofiletype, norm, effective_genome_size, scalefactorsmethod, operation, + pseudocount, ignoreduplicates, minmappingquality, samflaginclude, samflagexclude, minfraglen, maxfraglen, nproc, + _ignorechr, binsize, regions, verbose + ) + # Add assertions to verify the expected behavior + expected = ['3R\t0\t500\t4.571429\n', + '3R\t500\t1000\t2.8333333\n', + '3R\t1000\t1500\t1\n',] + _foo = open(ofile, 'r') + resp = _foo.readlines() + _foo.close() + assert f"{resp}" == f"{expected}", f"{resp} != {expected}" + unlink(ofile) \ No newline at end of file diff --git a/pydeeptools/deeptools/test/test_r_bamCoverage.py b/pydeeptools/deeptools/test/test_r_bamCoverage.py new file mode 100644 index 0000000000..5d50a67ef0 --- /dev/null +++ b/pydeeptools/deeptools/test/test_r_bamCoverage.py @@ -0,0 +1,182 @@ +import pytest +from deeptools.bamCoverage2 import r_bamcoverage +import os.path +import filecmp +from os import unlink + +ROOT = os.path.dirname(os.path.abspath(__file__)) + "/test_data/" +BAMFILE_A = ROOT + "testA.bam" +BAMFILE_B = ROOT + "testB.bam" + + +def test_r_bamcoverage(): + bamifile = BAMFILE_A + ofile = ROOT + "output.bedgraph" + ofiletype = "bedgraph" + norm = "None" + effectivegenomesize = 0 + scalefactor = 1.0 + mnase = False + offset = [1, -1] # Adjusted to a regular Python list + extendreads = 0 + centerreads = False + filterrnastrand = "none" + blacklist = "" + ignorechr = [] + skipnoncovregions = False + smoothlength = 0 + binsize = 50 + ignoreduplicates = False + minmappingquality = 0 + samflaginclude = 0 + samflagexclude = 0 + minfraglen = 0 + maxfraglen = 0 + nproc = 1 + regions = [] + verbose = False + + + # Call the Rust function + r_bamcoverage( + bamifile, ofile, ofiletype, norm, effectivegenomesize, scalefactor, mnase, offset, extendreads, + centerreads, filterrnastrand, blacklist, ignorechr, skipnoncovregions, smoothlength, + binsize, ignoreduplicates, minmappingquality, samflaginclude, samflagexclude, minfraglen, maxfraglen, + nproc, regions, verbose + ) + # Add assertions to verify the expected behavior + expected = ['3R\t0\t100\t0\n', '3R\t100\t200\t1\n', 'chr_cigar\t0\t50\t1\n', 'chr_cigar\t50\t200\t0\n'] + _foo = open(ofile, 'r') + resp = _foo.readlines() + _foo.close() + assert f"{resp}" == f"{expected}", f"{resp} != {expected}" + unlink(ofile) + +def test_r_bamcoverage_RPKM(): + bamifile = BAMFILE_A + ofile = ROOT + "output.bedgraph" + ofiletype = "bedgraph" + norm = "RPKM" + effectivegenomesize = 0 + scalefactor = 1.0 + mnase = False + offset = [1, -1] # Adjusted to a regular Python list + extendreads = 0 + centerreads = False + filterrnastrand = "none" + blacklist = "" + ignorechr = [] + skipnoncovregions = False + smoothlength = 0 + binsize = 50 + ignoreduplicates = False + minmappingquality = 0 + samflaginclude = 0 + samflagexclude = 0 + minfraglen = 0 + maxfraglen = 0 + nproc = 1 + regions = [] + verbose = False + + + # Call the Rust function + r_bamcoverage( + bamifile, ofile, ofiletype, norm, effectivegenomesize, scalefactor, mnase, offset, extendreads, + centerreads, filterrnastrand, blacklist, ignorechr, skipnoncovregions, smoothlength, + binsize, ignoreduplicates, minmappingquality, samflaginclude, samflagexclude, minfraglen, maxfraglen, + nproc, regions, verbose + ) + # Add assertions to verify the expected behavior + expected = ['3R\t0\t100\t0\n', '3R\t100\t200\t6666666.5\n', 'chr_cigar\t0\t50\t6666666.5\n', 'chr_cigar\t50\t200\t0\n'] + _foo = open(ofile, 'r') + resp = _foo.readlines() + _foo.close() + assert f"{resp}" == f"{expected}", f"{resp} != {expected}" + unlink(ofile) + +def test_r_bamcoverage_RPKM_with_effectivegenomesize(): + bamifile = BAMFILE_B + ofile = ROOT + "testB.bedgraph" + ofiletype = "bedgraph" + norm = "RPKM" + effectivegenomesize = 142573017 + scalefactor = 1.0 + mnase = False + offset = [1, -1] # Adjusted to a regular Python list + extendreads = 0 + centerreads = False + filterrnastrand = "none" + blacklist = "" + ignorechr = [] + skipnoncovregions = False + smoothlength = 0 + binsize = 50 + ignoreduplicates = False + minmappingquality = 0 + samflaginclude = 0 + samflagexclude = 0 + minfraglen = 0 + maxfraglen = 0 + nproc = 1 + regions = [] + verbose = False + + + # Call the Rust function + r_bamcoverage( + bamifile, ofile, ofiletype, norm, effectivegenomesize, scalefactor, mnase, offset, extendreads, + centerreads, filterrnastrand, blacklist, ignorechr, skipnoncovregions, smoothlength, + binsize, ignoreduplicates, minmappingquality, samflaginclude, samflagexclude, minfraglen, maxfraglen, + nproc, regions, verbose + ) + # Add assertions to verify the expected behavior + expected = ['3R\t0\t50\t0\n', '3R\t50\t150\t5000000\n', '3R\t150\t200\t10000000\n'] + _foo = open(ofile, 'r') + resp = _foo.readlines() + _foo.close() + assert f"{resp}" == f"{expected}", f"{resp} != {expected}" + unlink(ofile) + +def test_r_bamcoverage_RPKM_with_effectivegenomesize_and_scalefactor(): + bamifile = BAMFILE_B + ofile = ROOT + "testB.bedgraph" + ofiletype = "bedgraph" + norm = "RPKM" + effectivegenomesize = 142573017 + scalefactor = 2.0 + mnase = False + offset = [1, -1] # Adjusted to a regular Python list + extendreads = 0 + centerreads = False + filterrnastrand = "none" + blacklist = "" + ignorechr = [] + skipnoncovregions = False + smoothlength = 0 + binsize = 50 + ignoreduplicates = False + minmappingquality = 0 + samflaginclude = 0 + samflagexclude = 0 + minfraglen = 0 + maxfraglen = 0 + nproc = 1 + regions = [] + verbose = False + + + # Call the Rust function + r_bamcoverage( + bamifile, ofile, ofiletype, norm, effectivegenomesize, scalefactor, mnase, offset, extendreads, + centerreads, filterrnastrand, blacklist, ignorechr, skipnoncovregions, smoothlength, + binsize, ignoreduplicates, minmappingquality, samflaginclude, samflagexclude, minfraglen, maxfraglen, + nproc, regions, verbose + ) + # Add assertions to verify the expected behavior + expected = ['3R\t0\t50\t0\n', '3R\t50\t150\t2\n', '3R\t150\t200\t4\n'] + _foo = open(ofile, 'r') + resp = _foo.readlines() + _foo.close() + assert f"{resp}" == f"{expected}", f"{resp} != {expected}" + unlink(ofile) diff --git a/pydeeptools/deeptools/test/test_r_computeMatrix.py b/pydeeptools/deeptools/test/test_r_computeMatrix.py new file mode 100644 index 0000000000..a1dddb64f0 --- /dev/null +++ b/pydeeptools/deeptools/test/test_r_computeMatrix.py @@ -0,0 +1,109 @@ +import pytest +from deeptools.computeMatrix2 import r_computematrix # Adjust the import to your actual module +import os.path +import gzip +import hashlib + +root = os.path.dirname(os.path.abspath(__file__)) + "/test_data/" +matrix = root + "computeMatrixOperations.mat.gz" +bed = root + "computeMatrixOperations.bed" +rbindMatrix1 = root + "somegenes.txt.gz" +rbindMatrix2 = root + "othergenes.txt.gz" +bigwig = root + "testA.bw" +outnpz = root + "output.mat.npz.gz" + +def test_r_computematrix_referencePoint(): + mode = "reference-point" + regionlis = [bed] + bwlis = [bigwig] + sampleslabel = ["sample1"] + upstream = 1000 + downstream = 1000 + unscaled5prime = 0 + unscaled3prime = 0 + regionbodylength = 0 + binsize = 50 + missingdatazero = False + metagene = False + txnid = "" + exonid = "" + txniddesignator = "" + scale = 1.0 + nanafterend = False + skipzeros = False + minthresh = 0.0 + maxthresh = 0.0 + averagetypebins = "mean" + sortregions = "keep" + sortusing = "mean" + ortusingsamples = [] + referencepoint = "TSS" + nproc = 1 + verbose = False + ofile = outnpz + + result = r_computematrix( + mode, regionlis, bwlis, sampleslabel, upstream, downstream, unscaled5prime, unscaled3prime, + regionbodylength, binsize, missingdatazero, metagene, txnid, exonid, txniddesignator, scale, + nanafterend, skipzeros, minthresh, maxthresh, averagetypebins, sortregions, sortusing, + ortusingsamples, referencepoint, nproc, verbose, ofile + ) + + + + with gzip.open(ofile, 'rb') as f: + file_content = f.read() + h = hashlib.md5(file_content).hexdigest() + + expectedh = '4f1a2ce422d5b74fb6b75a81916929db' + assert h == expectedh + + os.remove(ofile) + +def test_r_computematrix_scale(): + mode = "scale-regions" + regionlis = [bed] + bwlis = [bigwig] + sampleslabel = ["sample1"] + upstream = 1000 + downstream = 1000 + unscaled5prime = 0 + unscaled3prime = 0 + regionbodylength = 0 + binsize = 50 + missingdatazero = False + metagene = False + txnid = "" + exonid = "" + txniddesignator = "" + scale = 1.0 + nanafterend = False + skipzeros = False + minthresh = 0.0 + maxthresh = 0.0 + averagetypebins = "mean" + sortregions = "keep" + sortusing = "mean" + ortusingsamples = [] + referencepoint = "TSS" + nproc = 1 + verbose = False + ofile = outnpz + + result = r_computematrix( + mode, regionlis, bwlis, sampleslabel, upstream, downstream, unscaled5prime, unscaled3prime, + regionbodylength, binsize, missingdatazero, metagene, txnid, exonid, txniddesignator, scale, + nanafterend, skipzeros, minthresh, maxthresh, averagetypebins, sortregions, sortusing, + ortusingsamples, referencepoint, nproc, verbose, ofile + ) + + with gzip.open(ofile, 'rb') as f: + file_content = f.read() + h = hashlib.md5(file_content).hexdigest() + + + expectedh = '4f1a2ce422d5b74fb6b75a81916929db' + assert h == expectedh + + os.remove(ofile) +