Skip to content

Commit 5d7c550

Browse files
committed
Edit Domain: enable transformation to time variable with format selection
1 parent 12bb88f commit 5d7c550

File tree

2 files changed

+117
-93
lines changed

2 files changed

+117
-93
lines changed

Orange/widgets/data/oweditdomain.py

Lines changed: 66 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,12 @@ class Unlink(_DataType, namedtuple("Unlink", [])):
190190
"""Unlink variable from its source, that is, remove compute_value"""
191191

192192

193-
Transform = Union[Rename, CategoriesMapping, Annotate, Unlink]
194-
TransformTypes = (Rename, CategoriesMapping, Annotate, Unlink)
193+
class StrpTime(_DataType, namedtuple("StrpTime", ["label", "formats", "have_date", "have_time"])):
194+
"""Use format on variable interpreted as time"""
195+
196+
197+
Transform = Union[Rename, CategoriesMapping, Annotate, Unlink, StrpTime]
198+
TransformTypes = (Rename, CategoriesMapping, Annotate, Unlink, StrpTime)
195199

196200
CategoricalTransformTypes = (CategoriesMapping, Unlink)
197201

@@ -1519,8 +1523,37 @@ class ContinuousVariableEditor(VariableEditor):
15191523

15201524

15211525
class TimeVariableEditor(VariableEditor):
1522-
# TODO: enable editing of display format...
1523-
pass
1526+
def __init__(self, parent=None, **kwargs):
1527+
super().__init__(parent, **kwargs)
1528+
form = self.layout().itemAt(0)
1529+
1530+
self.format_cb = QComboBox()
1531+
for item, data in [("Detect automatically", (None, 1, 1))] + list(
1532+
Orange.data.TimeVariable.ADDITIONAL_FORMATS.items()
1533+
):
1534+
self.format_cb.addItem(item, StrpTime(item, *data))
1535+
self.format_cb.currentIndexChanged.connect(self.variable_changed)
1536+
form.insertRow(2, "Format:", self.format_cb)
1537+
1538+
def set_data(self, var, transform=()):
1539+
super().set_data(var, transform)
1540+
if self.parent() is not None and isinstance(self.parent().var, Time):
1541+
# when transforming from time to time disable format selection combo
1542+
self.format_cb.setEnabled(False)
1543+
else:
1544+
# select the format from StrpTime transform
1545+
for tr in transform:
1546+
if isinstance(tr, StrpTime):
1547+
index = self.format_cb.findText(tr.label)
1548+
self.format_cb.setCurrentIndex(index)
1549+
self.format_cb.setEnabled(True)
1550+
1551+
def get_data(self):
1552+
var, tr = super().get_data()
1553+
if var is not None and (self.parent() is None or not isinstance(self.parent().var, Time)):
1554+
# do not add StrpTime when transforming from time to time
1555+
tr.insert(0, self.format_cb.currentData())
1556+
return var, tr
15241557

15251558

15261559
def variable_icon(var):
@@ -2581,14 +2614,17 @@ def apply_transform_time(var, trs):
25812614
def apply_transform_string(var, trs):
25822615
# type: (Orange.data.StringVariable, List[Transform]) -> Orange.data.Variable
25832616
name, annotations = var.name, var.attributes
2617+
out_type = Orange.data.StringVariable
2618+
compute_value = Identity
25842619
for tr in trs:
25852620
if isinstance(tr, Rename):
25862621
name = tr.name
25872622
elif isinstance(tr, Annotate):
25882623
annotations = _parse_attributes(tr.annotations)
2589-
variable = Orange.data.StringVariable(
2590-
name=name, compute_value=Identity(var)
2591-
)
2624+
elif isinstance(tr, StrpTime):
2625+
out_type = partial(Orange.data.TimeVariable, have_date=tr.have_date, have_time=tr.have_time)
2626+
compute_value = partial(ReparseTimeTransform, tr=tr)
2627+
variable = out_type(name=name, compute_value=compute_value(var))
25922628
variable.attributes.update(annotations)
25932629
return variable
25942630

@@ -2649,21 +2685,6 @@ def mapper(arr, out=None, dtype=dtype, **kwargs):
26492685
return mapper
26502686

26512687

2652-
def time_parse(values: Sequence[str], name="__"):
2653-
tvar = Orange.data.TimeVariable(name)
2654-
parse_time = ftry(tvar.parse, ValueError, np.nan)
2655-
_values = [parse_time(v) for v in values]
2656-
if np.all(np.isnan(_values)):
2657-
# try parsing it with pandas (like in transform)
2658-
dti = pd.to_datetime(values, errors="coerce")
2659-
_values = datetime_to_epoch(dti)
2660-
date_only = getattr(dti, "_is_dates_only", False)
2661-
if np.all(dti != pd.NaT):
2662-
tvar.have_date = True
2663-
tvar.have_time = not date_only
2664-
return tvar, _values
2665-
2666-
26672688
as_string = np.frompyfunc(str, 1, 1)
26682689
parse_float = ftry(float, ValueError, float("nan"))
26692690

@@ -2710,24 +2731,16 @@ def apply_reinterpret_d(var, tr, data):
27102731
# type: (Orange.data.DiscreteVariable, ReinterpretTransform, ndarray) -> Orange.data.Variable
27112732
if isinstance(tr, AsCategorical):
27122733
return var
2713-
elif isinstance(tr, AsString):
2734+
elif isinstance(tr, (AsString, AsTime)):
2735+
# TimeVar will be interpreted by StrpTime later
27142736
f = Lookup(var, np.array(var.values, dtype=object), unknown="")
2715-
rvar = Orange.data.StringVariable(
2716-
name=var.name, compute_value=f
2717-
)
2737+
rvar = Orange.data.StringVariable(name=var.name, compute_value=f)
27182738
elif isinstance(tr, AsContinuous):
27192739
f = Lookup(var, np.array(list(map(parse_float, var.values))),
27202740
unknown=np.nan)
27212741
rvar = Orange.data.ContinuousVariable(
27222742
name=var.name, compute_value=f, sparse=var.sparse
27232743
)
2724-
elif isinstance(tr, AsTime):
2725-
_tvar, values = time_parse(var.values)
2726-
f = Lookup(var, np.array(values), unknown=np.nan)
2727-
rvar = Orange.data.TimeVariable(
2728-
name=var.name, have_date=_tvar.have_date,
2729-
have_time=_tvar.have_time, compute_value=f,
2730-
)
27312744
else:
27322745
assert False
27332746
return copy_attributes(rvar, var)
@@ -2753,14 +2766,11 @@ def apply_reinterpret_c(var, tr, data: MArray):
27532766
elif isinstance(tr, AsContinuous):
27542767
return var
27552768
elif isinstance(tr, AsString):
2769+
# TimeVar will be interpreted by StrpTime later
27562770
tstr = ToStringTransform(var)
2757-
rvar = Orange.data.StringVariable(
2758-
name=var.name, compute_value=tstr
2759-
)
2771+
rvar = Orange.data.StringVariable(name=var.name, compute_value=tstr)
27602772
elif isinstance(tr, AsTime):
2761-
rvar = Orange.data.TimeVariable(
2762-
name=var.name, compute_value=Identity(var)
2763-
)
2773+
rvar = Orange.data.TimeVariable(name=var.name, compute_value=Identity(var))
27642774
else:
27652775
assert False
27662776
return copy_attributes(rvar, var)
@@ -2783,14 +2793,9 @@ def apply_reinterpret_s(var: Orange.data.StringVariable, tr, data: MArray):
27832793
rvar = Orange.data.ContinuousVariable(
27842794
var.name, compute_value=ToContinuousTransform(var)
27852795
)
2786-
elif isinstance(tr, AsString):
2796+
elif isinstance(tr, (AsString, AsTime)):
2797+
# TimeVar will be interpreted by StrpTime later
27872798
return var
2788-
elif isinstance(tr, AsTime):
2789-
tvar, _ = time_parse(np.unique(data.data[~data.mask]))
2790-
rvar = Orange.data.TimeVariable(
2791-
name=var.name, have_date=tvar.have_date, have_time=tvar.have_time,
2792-
compute_value=ReparseTimeTransform(var)
2793-
)
27942799
else:
27952800
assert False
27962801
return copy_attributes(rvar, var)
@@ -2822,6 +2827,7 @@ def apply_reinterpret_t(var: Orange.data.TimeVariable, tr, data):
28222827
else:
28232828
assert False
28242829
return copy_attributes(rvar, var)
2830+
#todo: disable format dropdown when allready time
28252831

28262832

28272833
def orange_isna(variable: Orange.data.Variable, data: ndarray) -> ndarray:
@@ -2867,23 +2873,28 @@ def transform(self, c):
28672873
raise TypeError
28682874

28692875

2870-
def datetime_to_epoch(dti: pd.DatetimeIndex) -> np.ndarray:
2876+
def datetime_to_epoch(dti: pd.DatetimeIndex, only_time) -> np.ndarray:
28712877
"""Convert datetime to epoch"""
2872-
data = dti.values.astype("M8[us]")
2873-
mask = np.isnat(data)
2874-
data = data.astype(float) / 1e6
2875-
data[mask] = np.nan
2876-
return data
2878+
delta = dti - (dti.normalize() if only_time else pd.Timestamp("1970-01-01"))
2879+
return (delta / pd.Timedelta("1s")).values
28772880

28782881

28792882
class ReparseTimeTransform(Transformation):
28802883
"""
28812884
Re-parse the column's string repr as datetime.
28822885
"""
2886+
def __init__(self, variable, tr):
2887+
super().__init__(variable)
2888+
self.tr = tr
2889+
28832890
def transform(self, c):
2884-
c = column_str_repr(self.variable, c)
2885-
c = pd.to_datetime(c, errors="coerce")
2886-
return datetime_to_epoch(c)
2891+
# if self.formats is none guess format option is selected
2892+
formats = self.tr.formats if self.tr.formats is not None else [None]
2893+
for f in formats:
2894+
d = pd.to_datetime(c, errors="coerce", format=f)
2895+
if pd.notnull(d).any():
2896+
return datetime_to_epoch(d, only_time=not self.tr.have_date)
2897+
return np.nan
28872898

28882899

28892900
class LookupMappingTransform(Transformation):

Orange/widgets/data/tests/test_oweditdomain.py

Lines changed: 51 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22
# pylint: disable=all
33
import pickle
44
import unittest
5-
from itertools import product
5+
from functools import partial
6+
from itertools import product, chain
67
from unittest import TestCase
78
from unittest.mock import Mock, patch
89

910
import numpy as np
1011
from numpy.testing import assert_array_equal
11-
import pandas as pd
1212

1313
from AnyQt.QtCore import QItemSelectionModel, Qt, QItemSelection, QPoint
1414
from AnyQt.QtGui import QPalette, QColor, QHelpEvent
@@ -33,8 +33,8 @@
3333
table_column_data, ReinterpretVariableEditor, CategoricalVector,
3434
VariableEditDelegate, TransformRole,
3535
RealVector, TimeVector, StringVector, make_dict_mapper, DictMissingConst,
36-
LookupMappingTransform, as_float_or_nan, column_str_repr, time_parse,
37-
GroupItemsDialog, VariableListModel
36+
LookupMappingTransform, as_float_or_nan, column_str_repr,
37+
GroupItemsDialog, VariableListModel, StrpTime
3838
)
3939
from Orange.widgets.data.owcolor import OWColor, ColorRole
4040
from Orange.widgets.tests.base import WidgetTest, GuiTest
@@ -589,8 +589,9 @@ def test_time_editor(self):
589589
),
590590
]
591591
ReinterpretTransforms = {
592-
Categorical: AsCategorical, Real: AsContinuous, Time: AsTime,
593-
String: AsString
592+
Categorical: [AsCategorical], Real: [AsContinuous],
593+
Time: [AsTime, partial(StrpTime, 'Detect automatically', None, 1, 1)],
594+
String: [AsString]
594595
}
595596

596597
def test_reinterpret_editor(self):
@@ -603,13 +604,13 @@ def test_reinterpret_editor(self):
603604
self.assertEqual(w.get_data(), (data.vtype, [Rename("Z")]))
604605

605606
for vec, tr in product(self.DataVectors, self.ReinterpretTransforms.values()):
606-
w.set_data(vec, [tr()])
607+
w.set_data(vec, [t() for t in tr])
607608
v, tr_ = w.get_data()
608609
self.assertEqual(v, vec.vtype)
609610
if not tr_:
610611
self.assertEqual(tr, self.ReinterpretTransforms[type(v)])
611612
else:
612-
self.assertEqual(tr_, [tr()])
613+
self.assertListEqual(tr_, [t() for t in tr])
613614

614615
def test_reinterpret_editor_simulate(self):
615616
w = ReinterpretVariableEditor()
@@ -619,7 +620,9 @@ def cb():
619620
var, tr = w.get_data()
620621
type_ = tc.currentData()
621622
if type_ is not type(var):
622-
self.assertEqual(tr, [self.ReinterpretTransforms[type_](), Rename("Z")])
623+
self.assertEqual(
624+
tr, [t() for t in self.ReinterpretTransforms[type_]] + [Rename("Z")]
625+
)
623626
else:
624627
self.assertEqual(tr, [Rename("Z")])
625628

@@ -912,34 +915,58 @@ def test_as_continuous(self):
912915
)
913916

914917
def test_as_time(self):
915-
table = self.data
916-
domain = table.domain
918+
# this test only test type of format that can be string, continuous and discrete
919+
# correctness of time formats is already tested in TimeVariable module
920+
d = TimeVariable("_").parse_exact_iso
921+
times = (
922+
["07.02.2022", "18.04.2021"], # date only
923+
["07.02.2022 01:02:03", "18.04.2021 01:02:03"], # datetime
924+
["010203", "010203"], # time
925+
["02-07", "04-18"],
926+
)
927+
formats = ["25.11.2021", "25.11.2021 00:00:00", "000000", "11-25"]
928+
expected = [
929+
[d("2022-02-07"), d("2021-04-18")],
930+
[d("2022-02-07 01:02:03"), d("2021-04-18 01:02:03")],
931+
[d("01:02:03"), d("01:02:03")],
932+
[d("1900-02-07"), d("1900-04-18")],
933+
]
934+
variables = [StringVariable(f"s{i}") for i in range(len(times))]
935+
variables += [DiscreteVariable(f"d{i}", values=t) for i, t in enumerate(times)]
936+
domain = Domain([], metas=variables)
937+
metas = [t for t in times] + [list(range(len(x))) for x in times]
938+
table = Table(domain, np.empty((len(times[0]), 0)), metas=np.array(metas).transpose())
917939

918940
tr = AsTime()
919941
dtr = []
920-
for v in domain.variables:
921-
vtr = apply_reinterpret(v, tr, table_column_data(table, v))
942+
for v, f in zip(domain.metas, chain(formats, formats)):
943+
strp = StrpTime(f, *TimeVariable.ADDITIONAL_FORMATS[f])
944+
vtr = apply_transform_var(
945+
apply_reinterpret(v, tr, table_column_data(table, v)), [strp]
946+
)
922947
dtr.append(vtr)
923948

924-
ttable = table.transform(Domain(dtr))
949+
ttable = table.transform(Domain([], metas=dtr))
925950
assert_array_equal(
926-
ttable.X,
927-
np.array([
928-
[np.nan, np.nan, 0.25, 180],
929-
[np.nan, np.nan, 1.25, 360],
930-
[np.nan, np.nan, 0.20, 720],
931-
[np.nan, np.nan, 0.00, 000],
932-
], dtype=float)
951+
ttable.metas,
952+
np.array(list(chain(expected, expected)), dtype=float).transpose()
933953
)
934954

935955
def test_reinterpret_string(self):
936956
table = self.data_str
937957
domain = table.domain
938958
tvars = []
939959
for v in domain.metas:
940-
for i, tr in enumerate([AsContinuous(), AsCategorical(), AsTime(), AsString()]):
941-
tr = apply_reinterpret(v, tr, table_column_data(table, v)).renamed(f'{v.name}_{i}')
942-
tvars.append(tr)
960+
for i, tr in enumerate(
961+
[AsContinuous(), AsCategorical(), AsTime(), AsString()]
962+
):
963+
vtr = apply_reinterpret(v, tr, table_column_data(table, v)).renamed(
964+
f"{v.name}_{i}"
965+
)
966+
if isinstance(tr, AsTime):
967+
strp = StrpTime("Detect automatically", None, 1, 1)
968+
vtr = apply_transform_var(vtr, [strp])
969+
tvars.append(vtr)
943970
tdomain = Domain([], metas=tvars)
944971
ttable = table.transform(tdomain)
945972
assert_array_nanequal(
@@ -1039,19 +1066,6 @@ def test_column_str_repr(self):
10391066
d = column_str_repr(v, np.array([0., np.nan, 1.0]))
10401067
assert_array_equal(d, ["00:00:00", "?", "00:00:01"])
10411068

1042-
def test_time_parse(self):
1043-
"""parsing additional datetimes by pandas"""
1044-
date = ["1/22/20", "1/23/20", "1/24/20"]
1045-
# we use privet method, check if still exists
1046-
assert hasattr(pd.DatetimeIndex, '_is_dates_only')
1047-
1048-
tval, values = time_parse(date)
1049-
1050-
self.assertTrue(tval.have_date)
1051-
self.assertFalse(tval.have_time)
1052-
self.assertListEqual(list(values),
1053-
[1579651200.0, 1579737600.0, 1579824000.0])
1054-
10551069

10561070
class TestLookupMappingTransform(TestCase):
10571071
def setUp(self) -> None:
@@ -1220,4 +1234,3 @@ def _test_correctness():
12201234

12211235
if __name__ == '__main__':
12221236
unittest.main()
1223-

0 commit comments

Comments
 (0)