Skip to content

Commit 39dad0f

Browse files
committed
Feature Constructor: Evaluate categorical variables to strings
1 parent 0a34127 commit 39dad0f

File tree

3 files changed

+86
-10
lines changed

3 files changed

+86
-10
lines changed

Orange/widgets/data/owfeatureconstructor.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,13 @@ def selected_row(view):
101101

102102

103103
class FeatureEditor(QFrame):
104+
ExpressionTooltip = """
105+
Use variable names as values in expression.
106+
Categorical features are passed as strings
107+
(note the change in behaviour from Orange 3.30).
108+
109+
""".lstrip()
110+
104111
FUNCTIONS = dict(chain([(key, val) for key, val in math.__dict__.items()
105112
if not key.startswith("_")],
106113
[(key, val) for key, val in builtins.__dict__.items()
@@ -231,7 +238,8 @@ def insert_into_expression(self, what):
231238

232239

233240
class ContinuousFeatureEditor(FeatureEditor):
234-
ExpressionTooltip = "A numeric expression"
241+
ExpressionTooltip = "A numeric expression\n\n" \
242+
+ FeatureEditor.ExpressionTooltip
235243

236244
def editorData(self):
237245
return ContinuousDescriptor(
@@ -242,7 +250,7 @@ def editorData(self):
242250

243251

244252
class DateTimeFeatureEditor(FeatureEditor):
245-
ExpressionTooltip = \
253+
ExpressionTooltip = FeatureEditor.ExpressionTooltip + \
246254
"Result must be a string in ISO-8601 format " \
247255
"(e.g. 2019-07-30T15:37:27 or a part thereof),\n" \
248256
"or a number of seconds since Jan 1, 1970."
@@ -255,7 +263,7 @@ def editorData(self):
255263

256264

257265
class DiscreteFeatureEditor(FeatureEditor):
258-
ExpressionTooltip = \
266+
ExpressionTooltip = FeatureEditor.ExpressionTooltip + \
259267
"Result must be a string, if values are not explicitly given\n" \
260268
"or a zero-based integer indices into a list of values given below."
261269

@@ -292,7 +300,8 @@ def editorData(self):
292300

293301

294302
class StringFeatureEditor(FeatureEditor):
295-
ExpressionTooltip = "A string expression"
303+
ExpressionTooltip = "A string expression\n\n" \
304+
+ FeatureEditor.ExpressionTooltip
296305

297306
def editorData(self):
298307
return StringDescriptor(
@@ -551,6 +560,19 @@ def reserved_names(self, idx_=None):
551560
if idx != idx_]
552561
return set(varnames)
553562

563+
@staticmethod
564+
def purged_values(descriptors, domain):
565+
expr = re.compile(r"(^|\W)("
566+
+ "|".join(f"{var.name}"
567+
for var in chain(domain.variables, domain.metas))
568+
+ r")\.value(\W|$)")
569+
return [
570+
descriptor._replace(
571+
expression=expr.sub(
572+
lambda mo: "".join(mo.group(1, 2, 3)),
573+
descriptor.expression))
574+
for descriptor in descriptors]
575+
554576
@Inputs.data
555577
@check_sql_input
556578
def setData(self, data=None):
@@ -572,6 +594,8 @@ def setData(self, data=None):
572594
selmodel = self.featureview.selectionModel()
573595
selmodel.selectionChanged.disconnect(
574596
self._on_selectedVariableChanged)
597+
self.descriptors = self.purged_values(
598+
self.descriptors, data.domain)
575599

576600
self.featuremodel[:] = list(self.descriptors)
577601
self.setCurrentIndex(self.currentIndex)
@@ -1092,9 +1116,10 @@ def __call__(self, instance, *_):
10921116
return [self(inst) for inst in instance]
10931117
else:
10941118
try:
1095-
args = [str(instance[var])
1096-
if instance.domain[var].is_string else instance[var]
1097-
for _, var in self.args]
1119+
args = [str(instance[var]) if var.is_string
1120+
else var.values[int(instance[var])] if var.is_discrete
1121+
else instance[var]
1122+
for _, var in self.args]
10981123
y = self.func(*args)
10991124
# user's expression can contain arbitrary errors
11001125
# this also covers missing attributes

Orange/widgets/data/tests/test_owfeatureconstructor.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
import numpy as np
1010

11+
from orangewidget.settings import Context
12+
1113
from Orange.data import (Table, Domain, StringVariable,
1214
ContinuousVariable, DiscreteVariable, TimeVariable)
1315
from Orange.widgets.tests.base import WidgetTest
@@ -366,14 +368,59 @@ def test_discrete_no_values(self):
366368

367369
discreteFeatureEditor.valuesedit.setText("A")
368370
discreteFeatureEditor.nameedit.setText("D1")
369-
discreteFeatureEditor.expressionedit.setText("iris")
371+
discreteFeatureEditor.expressionedit.setText("ord(iris[0])")
370372
self.widget.addFeature(
371373
discreteFeatureEditor.editorData()
372374
)
373375
self.assertFalse(self.widget.Error.more_values_needed.is_shown())
374376
self.widget.apply()
375377
self.assertTrue(self.widget.Error.more_values_needed.is_shown())
376378

379+
def test_purge_values(self):
380+
v = [ContinuousVariable(name) for name in ("ana", "berta", "cilka")]
381+
domain = Domain(v[:1], v[1], v[2:])
382+
383+
desc = self.widget.purged_values(
384+
[ContinuousDescriptor(
385+
"y", "ana.value + berta.value + cilka.value", 1)], domain)[0]
386+
self.assertEqual(desc.expression, "ana + berta + cilka")
387+
388+
desc = self.widget.purged_values(
389+
[ContinuousDescriptor(
390+
"y", "ana.value + dani.value + cilka.value", 1)], domain)[0]
391+
self.assertEqual(desc.expression, "ana + dani.value + cilka")
392+
393+
desc = self.widget.purged_values(
394+
[ContinuousDescriptor(
395+
"y", "sin(ana.value) + berta.value**2+cilka.value", 1)], domain)[0]
396+
self.assertEqual(desc.expression, "sin(ana) + berta**2+cilka")
397+
398+
desc = self.widget.purged_values(
399+
[ContinuousDescriptor(
400+
"y", "sin(ana.value) + list(dict(a=2).values())", 1)], domain)[0]
401+
self.assertEqual(desc.expression, "sin(ana) + list(dict(a=2).values())")
402+
403+
def test_purge_values_in_context(self):
404+
names = ("ana", "berta", "cilka")
405+
v = [DiscreteVariable(name, values=tuple("012")) for name in names]
406+
domain = Domain(v)
407+
data = Table.from_domain(domain, 5)
408+
409+
settings = {
410+
"context_settings":
411+
[Context(
412+
attributes=dict.fromkeys(names, 1), metas={},
413+
values=dict(
414+
descriptors=[ContinuousDescriptor(
415+
"y", "int(ana.value) + int(berta.value)", 1)],
416+
currentIndex=0)
417+
)]
418+
}
419+
widget = self.create_widget(OWFeatureConstructor, settings)
420+
self.send_signal(widget.Inputs.data, data)
421+
self.assertEqual(widget.descriptors[0].expression,
422+
"int(ana) + int(berta)")
423+
377424

378425
class TestFeatureEditor(unittest.TestCase):
379426
def test_has_functions(self):

doc/visual-programming/source/widgets/data/featureconstructor.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ Add new features to your dataset.
1111

1212
- Data: dataset with additional features
1313

14-
The **Feature Constructor** allows you to manually add features (columns) into your dataset. The new feature can be a computation of an existing one or a combination of several (addition, subtraction, etc.). You can choose what type of feature it will be (discrete, continuous or string) and what its parameters are (name, value, expression). For continuous variables you only have to construct an expression in Python.
14+
The **Feature Constructor** allows you to manually add features (columns) into your dataset. The new feature can be a computation of an existing one or a combination of several (addition, subtraction, etc.). You can choose what type of feature it will be (categorical, numeric or text) and what its parameters are (name, value, expression). For numeric variables you only have to construct an expression in Python.
1515

1616
![](images/feature-constructor1-stamped.png)
1717

@@ -24,7 +24,9 @@ The **Feature Constructor** allows you to manually add features (columns) into y
2424
7. Produce a report
2525
8. Press *Send* to communicate changes
2626

27-
For discrete variables, however, there's a bit more work. First add or remove the values you want for the new feature. Then select the base value and the expression. In the example below, we have constructed an expression with 'if lower than' and defined three conditions; the program ascribes 0 (which we renamed to lower) if the original value is lower than 6, 1 (mid) if it is lower than 7 and 2 (higher) for all the other values. Notice that we use an underscore for the feature name (e.g. petal\_length).
27+
For categorical variables, the expression can return either strings or indices into a list of possible values. In the below example, we defined three values, "lower", "mid" and "higher". The expression must then return zero-based indices into that list. In the example below, we have constructed an expression which returns 0 (referring to "lower") if the value of sepal_length is below 6, 1 ("mid") if it is lower than 7 and 2 ("higher") otherwise. Notice that we replace spaces in the variable name with undersore (e.g. petal\_length).
28+
29+
When the expression resturns strings, the list of values must be left empty.
2830

2931
![](images/feature-constructor2-stamped.png)
3032

@@ -38,6 +40,8 @@ For discrete variables, however, there's a bit more work. First add or remove th
3840
8. Produce a report
3941
9. Press *Send* to communicate changes
4042

43+
**Note**: before version 3.30, categorical values in expressions evaluated to indices into their lists of values; e.g., if a feature had values "red", "yellow" and "green", a values of "yellow" would evaluate to 2. This has been changed: categorical values are now evaluated as strings. The previous behaviour was unstable and can no longer be achieved in Feature Constructor.
44+
4145
Example
4246
-------
4347

0 commit comments

Comments
 (0)