Skip to content

Commit ee6b51b

Browse files
committed
Update setup.py to use .tsv files for downloading ANI databases
1 parent 5394a0f commit ee6b51b

File tree

1 file changed

+12
-8
lines changed

1 file changed

+12
-8
lines changed

setup.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python3
22

33
import collections
4+
import csv
45
import datetime
56
import gzip
67
import os
@@ -29,8 +30,8 @@ def run(self):
2930

3031
# download genome files for each databases
3132
for subset in ("species", "subspecies", "geneflow", "typestrains"):
32-
list = os.path.join(btyper3_path, "seq_ani_db", subset, "{}.txt".format(subset))
33-
self.download_genomes(btyper3_path, list, subset)
33+
db = os.path.join(btyper3_path, "seq_ani_db", subset, "{}.tsv".format(subset))
34+
self.download_genomes(btyper3_path, db, subset)
3435

3536
def download(self, url, dest, append=False, decompress=False):
3637
print("downloading {!r} to {!r}".format(url, dest))
@@ -66,13 +67,16 @@ def download_pubmlst(self, btyper3_path):
6667

6768
def download_genomes(self, btyper3_path, genome_list, ani_directory):
6869
with open(genome_list) as genomes:
69-
for line in genomes:
70-
if line.startswith("#"):
71-
continue
72-
gname, gpath = map(str.strip, line.split()[:2])
73-
gfile = os.path.join(btyper3_path, "seq_ani_db", ani_directory, gname)
70+
reader = csv.reader(genomes, dialect="excel-tab")
71+
72+
header = next(reader)
73+
id_col = header.index("id")
74+
url_col = header.index("url")
75+
76+
for row in reader:
77+
gfile = os.path.join(btyper3_path, "seq_ani_db", ani_directory, row[id_col])
7478
if not os.path.isfile(gfile):
75-
self.download(url=gpath, dest=gfile)
79+
self.download(url=row[url_col], dest=gfile)
7680

7781

7882
setuptools.setup(cmdclass={"build_py": build_py})

0 commit comments

Comments
 (0)