- Convert python2 code to python3 with 2to3

dridk · dridk · commit a69e691ac974 · 2022-01-28T20:08:45.000+01:00
- remove all reference to python2
diff --git a/setup.py b/setup.py
@@ -29,7 +29,7 @@
     extras['ext_modules'] = [Extension("vcf.cparse", ["vcf/cparse.pyx"])]
 
 setup(
-    name='PyVCF',
+    name='PyVCF3',
     packages=['vcf', 'vcf.test'],
     scripts=['scripts/vcf_melt', 'scripts/vcf_filter.py',
              'scripts/vcf_sample_filter.py'],
@@ -49,7 +49,7 @@
             'snp-only = vcf.filters:SnpOnly',
         ]
     },
-    url='https://github.com/jamescasbon/PyVCF',
+    url='https://github.com/dridk/PyVCF3',
     version=VERSION,
     classifiers = [
         'Development Status :: 4 - Beta',
@@ -60,8 +60,6 @@
         'Operating System :: OS Independent',
         'Programming Language :: Cython',
         'Programming Language :: Python',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
@@ -71,7 +69,6 @@
         'Topic :: Scientific/Engineering :: Bio-Informatics',
       ],
     keywords='bioinformatics',
-    use_2to3=True,
     include_package_data=True,
     package_data = {
         '': ['*.vcf', '*.gz', '*.tbi'],
diff --git a/tox.ini b/tox.ini
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py27, py34, py35, py36, pypy, pypy3
+envlist =  py34, py35, py36, pypy, pypy3
 
 [testenv]
 deps =
diff --git a/vcf/__init__.py b/vcf/__init__.py
@@ -12,4 +12,4 @@
 from vcf.parser import RESERVED_INFO, RESERVED_FORMAT
 from vcf.sample_filter import SampleFilter
 
-VERSION = '0.6.8'
+VERSION = '1.0.0'
diff --git a/vcf/model.py b/vcf/model.py
@@ -362,7 +362,7 @@ def heterozygosity(self):
         If there are i alleles with frequency p_i, H=1-sum_i(p_i^2)
         """
         allele_freqs = [1-sum(self.aaf)] + self.aaf
-        return 1 - sum(map(lambda x: x**2, allele_freqs))
+        return 1 - sum([x**2 for x in allele_freqs])
 
     def get_hom_refs(self):
         """ The list of hom ref genotypes"""
@@ -558,9 +558,8 @@ def is_filtered(self):
             return True
 
 
-class _AltRecord(object):
+class _AltRecord(object, metaclass=ABCMeta):
     '''An alternative allele record: either replacement string, SV placeholder, or breakend'''
-    __metaclass__ = ABCMeta
 
     def __init__(self, type, **kwargs):
         super(_AltRecord, self).__init__(**kwargs)
@@ -596,7 +595,7 @@ def __len__(self):
         return len(self.sequence)
 
     def __eq__(self, other):
-        if isinstance(other, basestring):
+        if isinstance(other, str):
             return self.sequence == other
         elif not isinstance(other, self.__class__):
             return False
diff --git a/vcf/parser.py b/vcf/parser.py
@@ -22,8 +22,8 @@
 except ImportError:
     cparse = None
 
-from model import _Call, _Record, make_calldata_tuple
-from model import _Substitution, _Breakend, _SingleBreakend, _SV
+from .model import _Call, _Record, make_calldata_tuple
+from .model import _Substitution, _Breakend, _SingleBreakend, _SV
 
 
 # Metadata parsers/constants
@@ -468,7 +468,7 @@ def _parse_samples(self, samples, samp_fmt, site):
 
         nfields = len(samp_fmt._fields)
 
-        for name, sample in itertools.izip(self.samples, samples):
+        for name, sample in zip(self.samples, samples):
 
             # parse the data for this sample
             sampdat = [None] * nfields
@@ -548,7 +548,7 @@ def _parse_alt(self, str):
         else:
             return _Substitution(str)
 
-    def next(self):
+    def __next__(self):
         '''Return the next record in the file.'''
         line = next(self.reader)
         row = self._row_pattern.split(line.rstrip())
@@ -641,7 +641,7 @@ class Writer(object):
     """VCF Writer. On Windows Python 2, open stream with 'wb'."""
 
     # Reverse keys and values in header field count dictionary
-    counts = dict((v,k) for k,v in field_counts.iteritems())
+    counts = dict((v,k) for k,v in field_counts.items())
 
     def __init__(self, stream, template, lineterminator="\n"):
         self.writer = csv.writer(stream, delimiter="\t",
@@ -654,30 +654,30 @@ def __init__(self, stream, template, lineterminator="\n"):
         # get a maximum key).
         self.info_order = collections.defaultdict(
             lambda: len(template.infos),
-            dict(zip(template.infos.iterkeys(), itertools.count())))
+            dict(list(zip(iter(template.infos.keys()), itertools.count()))))
 
         two = '##{key}=<ID={0},Description="{1}">\n'
         four = '##{key}=<ID={0},Number={num},Type={2},Description="{3}">\n'
         _num = self._fix_field_count
-        for (key, vals) in template.metadata.iteritems():
+        for (key, vals) in template.metadata.items():
             if key in SINGULAR_METADATA:
                 vals = [vals]
             for val in vals:
                 if isinstance(val, dict):
                     values = ','.join('{0}={1}'.format(key, value)
-                                      for key, value in val.items())
+                                      for key, value in list(val.items()))
                     stream.write('##{0}=<{1}>\n'.format(key, values))
                 else:
                     stream.write('##{0}={1}\n'.format(key, val))
-        for line in template.infos.itervalues():
+        for line in template.infos.values():
             stream.write(four.format(key="INFO", *line, num=_num(line.num)))
-        for line in template.formats.itervalues():
+        for line in template.formats.values():
             stream.write(four.format(key="FORMAT", *line, num=_num(line.num)))
-        for line in template.filters.itervalues():
+        for line in template.filters.values():
             stream.write(two.format(key="FILTER", *line))
-        for line in template.alts.itervalues():
+        for line in template.alts.values():
             stream.write(two.format(key="ALT", *line))
-        for line in template.contigs.itervalues():
+        for line in template.contigs.values():
             if line.length:
                 stream.write('##contig=<ID={0},length={1}>\n'.format(*line))
             else:
diff --git a/vcf/sample_filter.py b/vcf/sample_filter.py
@@ -7,7 +7,7 @@
 import warnings
 
 
-from parser import Reader, Writer
+from .parser import Reader, Writer
 
 
 class SampleFilter(object):
@@ -81,13 +81,13 @@ def filt2idx(item):
                 # is int, check if it's an idx
                 if item < len(self.samples):
                     return item
-        filters = set(filter(lambda x: x is not None, map(filt2idx, filt_s)))
+        filters = set([x for x in map(filt2idx, filt_s) if x is not None])
         if len(filters) < len(filt_s):
             # TODO print the filters that were ignored
             warnings.warn("Invalid filters, ignoring", RuntimeWarning)
 
         if self.invert:
-            filters = set(xrange(len(self.samples))).difference(filters)
+            filters = set(range(len(self.samples))).difference(filters)
 
         # `sample_filter` setter updates `samples`
         self.parser.sample_filter = filters
diff --git a/vcf/test/prof.py b/vcf/test/prof.py
@@ -19,7 +19,7 @@ def parse_1kg():
 elif sys.argv[1] == 'time':
     n = 1
     t = timeit.timeit('parse_1kg()',  "from __main__ import parse_1kg", number=n)
-    print t/n
+    print(t/n)
 
 elif sys.argv[1] == 'stat':
     import statprof
@@ -30,4 +30,4 @@ def parse_1kg():
         statprof.stop()
         statprof.display()
 else:
-    print 'prof.py profile/time'
+    print('prof.py profile/time')
diff --git a/vcf/test/test_vcf.py b/vcf/test/test_vcf.py
@@ -1,14 +1,14 @@
-from __future__ import print_function
+
 import unittest
 try:
     unittest.skip
 except AttributeError:
     import unittest2 as unittest
 import doctest
 import os
-import commands
-import cPickle
-from StringIO import StringIO
+import subprocess
+import pickle
+from io import StringIO
 import subprocess
 import sys
 
@@ -137,7 +137,7 @@ def test_contig_idonly(self):
         """Test VCF inputs with ##contig inputs containing only IDs. produced by bcftools 1.2+
         """
         reader = vcf.Reader(fh("contig_idonly.vcf"))
-        for cid, contig in reader.contigs.items():
+        for cid, contig in list(reader.contigs.items()):
             if cid == "1":
                 assert contig.length is None
             elif cid == "2":
@@ -390,24 +390,24 @@ def test_write(self):
         reader2 = vcf.Reader(out)
 
         for l, r in zip(records, reader2):
-            self.assertEquals(l.INFO, r.INFO)
+            self.assertEqual(l.INFO, r.INFO)
 
 
 class TestBadInfoFields(unittest.TestCase):
     def test_parse(self):
         reader = vcf.Reader(fh('bad-info-character.vcf'))
         record = next(reader)
-        self.assertEquals(record.INFO['DOT_1'], None)
-        self.assertEquals(record.INFO['DOT_3'], [None, None, None])
-        self.assertEquals(record.INFO['DOT_N'], [None])
-        self.assertEquals(record.INFO['EMPTY_1'], None)
+        self.assertEqual(record.INFO['DOT_1'], None)
+        self.assertEqual(record.INFO['DOT_3'], [None, None, None])
+        self.assertEqual(record.INFO['DOT_N'], [None])
+        self.assertEqual(record.INFO['EMPTY_1'], None)
         # Perhaps EMPTY_3 should yield [None, None, None] but this is really a
         # cornercase of unspecified behaviour.
-        self.assertEquals(record.INFO['EMPTY_3'], [None])
-        self.assertEquals(record.INFO['EMPTY_N'], [None])
-        self.assertEquals(record.INFO['NOTEMPTY_1'], 1)
-        self.assertEquals(record.INFO['NOTEMPTY_3'], [1, 2, 3])
-        self.assertEquals(record.INFO['NOTEMPTY_N'], [1])
+        self.assertEqual(record.INFO['EMPTY_3'], [None])
+        self.assertEqual(record.INFO['EMPTY_N'], [None])
+        self.assertEqual(record.INFO['NOTEMPTY_1'], 1)
+        self.assertEqual(record.INFO['NOTEMPTY_3'], [1, 2, 3])
+        self.assertEqual(record.INFO['NOTEMPTY_N'], [1])
         pass
 
 
@@ -440,7 +440,7 @@ def test_write(self):
         self.assertEqual(f['Options'], '"< 4 and > 3"')
 
         for l, r in zip(records, reader2):
-            self.assertEquals(l.INFO, r.INFO)
+            self.assertEqual(l.INFO, r.INFO)
 
 
 class TestGatkOutputWriter(unittest.TestCase):
@@ -463,13 +463,13 @@ def testWrite(self):
         print (out_str)
         reader2 = vcf.Reader(out)
 
-        self.assertEquals(reader.samples, reader2.samples)
-        self.assertEquals(reader.formats, reader2.formats)
-        self.assertEquals(reader.infos, reader2.infos)
-        self.assertEquals(reader.contigs, reader2.contigs)
+        self.assertEqual(reader.samples, reader2.samples)
+        self.assertEqual(reader.formats, reader2.formats)
+        self.assertEqual(reader.infos, reader2.infos)
+        self.assertEqual(reader.contigs, reader2.contigs)
 
         for l, r in zip(records, reader2):
-            self.assertEquals(l.samples, r.samples)
+            self.assertEqual(l.samples, r.samples)
 
             # test for call data equality, since equality on the sample calls
             # may not always mean their data are all equal
@@ -493,12 +493,12 @@ def testWrite(self):
         print (out.getvalue())
         reader2 = vcf.Reader(out)
 
-        self.assertEquals(reader.samples, reader2.samples)
-        self.assertEquals(reader.formats, reader2.formats)
-        self.assertEquals(reader.infos, reader2.infos)
+        self.assertEqual(reader.samples, reader2.samples)
+        self.assertEqual(reader.formats, reader2.formats)
+        self.assertEqual(reader.infos, reader2.infos)
 
         for l, r in zip(records, reader2):
-            self.assertEquals(l.samples, r.samples)
+            self.assertEqual(l.samples, r.samples)
 
             # test for call data equality, since equality on the sample calls
             # may not always mean their data are all equal
@@ -522,7 +522,7 @@ def testWrite(self):
         out_str = out.getvalue()
         for line in out_str.split("\n"):
             if line.startswith("##PEDIGREE"):
-                self.assertEquals(line, '##PEDIGREE=<Derived="Tumor",Original="Germline">')
+                self.assertEqual(line, '##PEDIGREE=<Derived="Tumor",Original="Germline">')
             if line.startswith("##SAMPLE"):
                 assert line.startswith('##SAMPLE=<'), "Found dictionary in meta line: {0}".format(line)
 
@@ -955,7 +955,7 @@ def test_info_multiple_values(self):
     def test_pickle(self):
         reader = vcf.Reader(fh('example-4.0.vcf'))
         for var in reader:
-            self.assertEqual(cPickle.loads(cPickle.dumps(var)), var)
+            self.assertEqual(pickle.loads(pickle.dumps(var)), var)
 
 
     def assert_has_expected_coordinates(
@@ -1498,7 +1498,7 @@ class TestFilter(unittest.TestCase):
     @unittest.skip("test currently broken")
     def testApplyFilter(self):
         # FIXME: broken with distribute
-        s, out = commands.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 test/example-4.0.vcf sq')
+        s, out = subprocess.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 test/example-4.0.vcf sq')
         #print(out)
         self.assertEqual(s, 0)
         buf = StringIO()
@@ -1528,7 +1528,7 @@ def testApplyFilter(self):
     @unittest.skip("test currently broken")
     def testApplyMultipleFilters(self):
         # FIXME: broken with distribute
-        s, out = commands.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 '
+        s, out = subprocess.getstatusoutput('python scripts/vcf_filter.py --site-quality 30 '
         '--genotype-quality 50 test/example-4.0.vcf sq mgq')
         self.assertEqual(s, 0)
         #print(out)
@@ -1599,7 +1599,7 @@ def test_walk(self):
                 assert recs[1] is not None
 
         # test files with many chromosomes, set 'vcf_record_sort_key' to define chromosome order
-        chr_order = map(str, range(1, 30)) + ['X', 'Y', 'M']
+        chr_order = list(map(str, list(range(1, 30)))) + ['X', 'Y', 'M']
         get_key = lambda r: (chr_order.index(r.CHROM.replace('chr','')), r.POS)
         reader1 = vcf.Reader(fh('issue-140-file1.vcf'))
         reader2 = vcf.Reader(fh('issue-140-file2.vcf'))
diff --git a/vcf/utils.py b/vcf/utils.py
@@ -37,19 +37,19 @@ def walk_together(*readers, **kwargs):
         next_idx_to_k = dict(
             (i, get_key(r)) for i, r in enumerate(nexts) if r is not None)
         keys_with_prev_contig = [
-            k for k in next_idx_to_k.values() if k[0] == min_k[0]]
+            k for k in list(next_idx_to_k.values()) if k[0] == min_k[0]]
 
         if any(keys_with_prev_contig):
             min_k = min(keys_with_prev_contig)   # finish previous contig
         else:
             min_k = min(next_idx_to_k.values())   # move on to next contig
 
-        min_k_idxs = set([i for i, k in next_idx_to_k.items() if k == min_k])
+        min_k_idxs = set([i for i, k in list(next_idx_to_k.items()) if k == min_k])
         yield [nexts[i] if i in min_k_idxs else None for i in range(len(nexts))]
 
         for i in min_k_idxs:
             try:
-                nexts[i] = readers[i].next()
+                nexts[i] = next(readers[i])
             except StopIteration:
                 nexts[i] = None