Skip to content

Commit 352f051

Browse files
authored
Merge pull request #2077 from markotoplak/wildcard-names
File formats: wildcard matching
2 parents 59c0ded + 3f9ea59 commit 352f051

File tree

2 files changed

+73
-4
lines changed

2 files changed

+73
-4
lines changed

Orange/data/io.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from tempfile import NamedTemporaryFile
1818
from urllib.parse import urlparse, urlsplit, urlunsplit, unquote as urlunquote
1919
from urllib.request import urlopen, Request
20+
from fnmatch import fnmatch
21+
from glob import glob
2022

2123
import bottleneck as bn
2224
import numpy as np
@@ -362,7 +364,7 @@ def get_reader(cls, filename):
362364
# Skip ambiguous, invalid compression-only extensions added on OSX
363365
if ext in Compression.all:
364366
continue
365-
if filename.endswith(ext):
367+
if fnmatch(path.basename(filename), '*' + ext):
366368
return reader(filename)
367369

368370
raise IOError('No readers for file "{}"'.format(filename))
@@ -421,10 +423,12 @@ def locate(cls, filename, search_dirs=('.',)):
421423
if path.exists(absolute_filename):
422424
break
423425
for ext in cls.readers:
424-
if filename.endswith(ext):
426+
if fnmatch(path.basename(filename), '*' + ext):
425427
break
426-
if path.exists(absolute_filename + ext):
427-
absolute_filename += ext
428+
# glob uses fnmatch internally
429+
matching_files = glob(absolute_filename + ext)
430+
if matching_files:
431+
absolute_filename = matching_files[0]
428432
break
429433
if path.exists(absolute_filename):
430434
break

Orange/tests/test_io.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Test methods with long descriptive names can omit docstrings
2+
# pylint: disable=missing-docstring
3+
4+
import unittest
5+
import os
6+
import tempfile
7+
import shutil
8+
9+
from Orange.data.io import FileFormat, TabReader, CSVReader, PickleReader
10+
from Orange.data.table import get_sample_datasets_dir
11+
12+
class WildcardReader(FileFormat):
13+
EXTENSIONS = ('.wild', '.wild[0-9]')
14+
DESCRIPTION = "Dummy reader for testing extensions"
15+
16+
def read(self):
17+
pass
18+
19+
20+
class TestChooseReader(unittest.TestCase):
21+
22+
def test_usual_extensions(self):
23+
self.assertIsInstance(FileFormat.get_reader("t.tab"), TabReader)
24+
self.assertIsInstance(FileFormat.get_reader("t.csv"), CSVReader)
25+
self.assertIsInstance(FileFormat.get_reader("t.pkl"), PickleReader)
26+
with self.assertRaises(OSError):
27+
FileFormat.get_reader("test.undefined_extension")
28+
29+
def test_wildcard_extension(self):
30+
self.assertIsInstance(FileFormat.get_reader("t.wild"),
31+
WildcardReader)
32+
self.assertIsInstance(FileFormat.get_reader("t.wild2"),
33+
WildcardReader)
34+
with self.assertRaises(OSError):
35+
FileFormat.get_reader("t.wild2a")
36+
37+
38+
class TestLocate(unittest.TestCase):
39+
40+
def test_locate_sample_datasets(self):
41+
with self.assertRaises(OSError):
42+
FileFormat.locate("iris.tab",
43+
search_dirs=[os.path.dirname(__file__)])
44+
iris = FileFormat.locate("iris.tab",
45+
search_dirs=[get_sample_datasets_dir()])
46+
self.assertEqual(os.path.basename(iris), "iris.tab")
47+
# test extension adding
48+
iris = FileFormat.locate("iris",
49+
search_dirs=[get_sample_datasets_dir()])
50+
self.assertEqual(os.path.basename(iris), "iris.tab")
51+
52+
53+
def test_locate_wildcard_extension(self):
54+
tempdir = tempfile.mkdtemp()
55+
with self.assertRaises(OSError):
56+
FileFormat.locate("t.wild9", search_dirs=[tempdir])
57+
fn = os.path.join(tempdir, "t.wild8")
58+
with open(fn, "wt") as f:
59+
f.write("\n")
60+
l = FileFormat.locate("t.wild8", search_dirs=[tempdir])
61+
self.assertEqual(l, fn)
62+
# test extension adding
63+
l = FileFormat.locate("t", search_dirs=[tempdir])
64+
self.assertEqual(l, fn)
65+
shutil.rmtree(tempdir)

0 commit comments

Comments
 (0)