Skip to content

Commit 711c3c0

Browse files
committed
Create class: Add report, improve GUI, other minor fixes
1 parent 1521c04 commit 711c3c0

File tree

2 files changed

+146
-34
lines changed

2 files changed

+146
-34
lines changed

Orange/widgets/data/owcreateclass.py

Lines changed: 117 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
"""Widget for creating classes from non-numeric attribute by substrings"""
2+
import re
3+
from itertools import count
4+
25
import numpy as np
36

47
from AnyQt.QtWidgets import QGridLayout, QLabel, QLineEdit, QSizePolicy
@@ -133,6 +136,7 @@ class OWCreateClass(widget.OWWidget):
133136

134137
settingsHandler = DomainContextHandler()
135138
attribute = ContextSetting(None)
139+
class_name = ContextSetting("class")
136140
rules = ContextSetting({})
137141
match_beginning = ContextSetting(False)
138142
case_sensitive = ContextSetting(False)
@@ -147,7 +151,7 @@ def __init__(self):
147151
super().__init__()
148152
self.data = None
149153

150-
# The following lists are of the same length as self.activeRules
154+
# The following lists are of the same length as self.active_rules
151155

152156
#: list of pairs with counts of matches for each patter when the
153157
# patterns are applied in order and when applied on the entire set,
@@ -161,31 +165,39 @@ def __init__(self):
161165
#: list of list of QLabel: pairs of labels with counts
162166
self.counts = []
163167

164-
patternbox = gui.vBox(self.controlArea, box="Patterns")
165-
box = gui.hBox(patternbox)
166-
gui.widgetLabel(box, "Class from column: ", addSpace=12)
167-
gui.comboBox(
168-
box, self, "attribute", callback=self.update_rules,
169-
model=DomainModel(valid_types=(StringVariable, DiscreteVariable)),
170-
sizePolicy=(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed))
168+
combo = gui.comboBox(
169+
self.controlArea, self, "attribute", label="From column: ",
170+
box=True, orientation=Qt.Horizontal, callback=self.update_rules,
171+
model=DomainModel(valid_types=(StringVariable, DiscreteVariable)))
172+
# Don't use setSizePolicy keyword argument here: it applies to box,
173+
# not the combo
174+
combo.setSizePolicy(QSizePolicy.MinimumExpanding, QSizePolicy.Fixed)
171175

176+
patternbox = gui.vBox(self.controlArea, box=True)
172177
#: QWidget: the box that contains the remove buttons, line edits and
173178
# count labels. The lines are added and removed dynamically.
174179
self.rules_box = rules_box = QGridLayout()
175180
patternbox.layout().addLayout(self.rules_box)
176-
self.add_button = gui.button(None, self, "+", flat=True,
177-
callback=self.add_row, autoDefault=False,
178-
minimumSize=QSize(12, 20))
179-
self.rules_box.setColumnMinimumWidth(1, 80)
181+
box = gui.hBox(patternbox)
182+
gui.button(
183+
box, self, "+", callback=self.add_row, autoDefault=False, flat=True,
184+
minimumSize=(QSize(20, 20)))
185+
gui.rubber(box)
186+
self.rules_box.setColumnMinimumWidth(1, 70)
180187
self.rules_box.setColumnMinimumWidth(0, 10)
181188
self.rules_box.setColumnStretch(0, 1)
182189
self.rules_box.setColumnStretch(1, 1)
183190
self.rules_box.setColumnStretch(2, 100)
184191
rules_box.addWidget(QLabel("Name"), 0, 1)
185-
rules_box.addWidget(QLabel("Pattern"), 0, 2)
192+
rules_box.addWidget(QLabel("Substring"), 0, 2)
186193
rules_box.addWidget(QLabel("#Instances"), 0, 3, 1, 2)
187194
self.update_rules()
188195

196+
gui.lineEdit(
197+
self.controlArea, self, "class_name",
198+
label="Name for the new class:",
199+
box=True, orientation=Qt.Horizontal)
200+
189201
optionsbox = gui.vBox(self.controlArea, box=True)
190202
gui.checkBox(
191203
optionsbox, self, "match_beginning", "Match only at the beginning",
@@ -194,9 +206,14 @@ def __init__(self):
194206
optionsbox, self, "case_sensitive", "Case sensitive",
195207
callback=self.options_changed)
196208

197-
box = gui.hBox(self.controlArea)
198-
gui.rubber(box)
199-
gui.button(box, self, "Apply", autoDefault=False, callback=self.apply)
209+
layout = QGridLayout()
210+
gui.widgetBox(self.controlArea, orientation=layout)
211+
for i in range(3):
212+
layout.setColumnStretch(i, 1)
213+
layout.addWidget(self.report_button, 0, 0)
214+
apply = gui.button(None, self, "Apply", autoDefault=False,
215+
callback=self.apply)
216+
layout.addWidget(apply, 0, 2)
200217

201218
# TODO: Resizing upon changing the number of rules does not work
202219
self.setSizePolicy(QSizePolicy.Preferred, QSizePolicy.Maximum)
@@ -209,7 +226,7 @@ def active_rules(self):
209226
set the default.
210227
"""
211228
return self.rules.setdefault(self.attribute and self.attribute.name,
212-
[["C1", ""], ["C2", ""]])
229+
[["", ""], ["", ""]])
213230

214231
def rules_to_edits(self):
215232
"""Fill the line edites with the rules from the current settings."""
@@ -293,26 +310,39 @@ def _fix_tab_order():
293310
_add_line()
294311
while len(self.line_edits) > n:
295312
_remove_line()
296-
self.rules_box.addWidget(self.add_button, n + 1, 0)
297313
_fix_tab_order()
298314

299315
def add_row(self):
300316
"""Append a new row at the end."""
301317
self.active_rules.append(["", ""])
302318
self.adjust_n_rule_rows()
319+
self.update_counts()
303320

304321
def remove_row(self):
305322
"""Remove a row."""
306323
remove_idx = self.remove_buttons.index(self.sender())
307324
del self.active_rules[remove_idx]
308325
self.update_rules()
326+
self.update_counts()
309327

310328
def sync_edit(self, text):
311329
"""Handle changes in line edits: update the active rules and counts"""
312330
edit = self.sender()
313331
edit.row[edit.col_idx] = text
314332
self.update_counts()
315333

334+
def class_labels(self):
335+
"""Construct a list of class labels. Empty labels are replaced with
336+
C1, C2, C3. If C<n> already appears in the list of values given by
337+
the user, the labels start at C<n+1> instead.
338+
"""
339+
largest_c = max((int(label[1:]) for label, _ in self.active_rules
340+
if re.match("^C\\d+", label)),
341+
default=0)
342+
class_count = count(largest_c + 1)
343+
return [label_edit.text() or "C{}".format(next(class_count))
344+
for label_edit, _ in self.line_edits]
345+
316346
def update_counts(self):
317347
"""Recompute and update the counts of matches."""
318348
def _matcher(strings, pattern):
@@ -375,12 +405,33 @@ def _clear_labels():
375405

376406
def _set_labels():
377407
"""Set the labels to show the counts"""
378-
for (n_matched, n_total), (lab_matched, lab_total) in \
379-
zip(self.match_counts, self.counts):
408+
for (n_matched, n_total), (lab_matched, lab_total), (lab, patt) in \
409+
zip(self.match_counts, self.counts, self.active_rules):
380410
n_before = n_total - n_matched
381411
lab_matched.setText("{}".format(n_matched))
382-
if n_before:
412+
if n_before and (lab or patt):
383413
lab_total.setText("+ {}".format(n_before))
414+
if n_matched:
415+
tip = "{} of the {} matching instances are already " \
416+
"covered above".format(n_before, n_total)
417+
else:
418+
tip = "All matching instances are already covered above"
419+
lab_total.setToolTip(tip)
420+
lab_matched.setToolTip(tip)
421+
422+
def _set_placeholders():
423+
"""Set placeholders for empty edit lines"""
424+
matches = [n for n, _ in self.match_counts] + \
425+
[0] * len(self.line_edits)
426+
for n_matched, (_, patt) in zip(matches, self.line_edits):
427+
if not patt.text():
428+
patt.setPlaceholderText(
429+
"(remaining instances)" if n_matched else "(unused)")
430+
431+
labels = self.class_labels()
432+
for label, (lab_edit, _) in zip(labels, self.line_edits):
433+
if not lab_edit.text():
434+
lab_edit.setPlaceholderText(label)
384435

385436
_clear_labels()
386437
attr = self.attribute
@@ -392,27 +443,67 @@ def _set_labels():
392443
self.match_counts = [[int(np.sum(x)) for x in matches]
393444
for matches in counters[type(attr)]()]
394445
_set_labels()
446+
_set_placeholders()
395447

396448
def apply(self):
397449
"""Output the transformed data."""
398-
if not self.attribute or not self.active_rules:
450+
if not self.attribute:
399451
self.send("Data", None)
400452
return
401453
domain = self.data.domain
454+
rules = self.active_rules
402455
# Transposition + stripping
403-
names, patterns = \
404-
zip(*((name.strip(), pattern)
405-
for name, pattern in self.active_rules if name.strip()))
456+
valid_rules = [label or pattern or n_matches
457+
for (label, pattern), n_matches in
458+
zip(rules, self.match_counts)]
459+
patterns = [pattern
460+
for (_, pattern), valid in zip(rules, valid_rules)
461+
if valid]
462+
names = [name for name, valid in zip(self.class_labels(), valid_rules)
463+
if valid]
406464
transformer = self.TRANSFORMERS[type(self.attribute)]
407465
compute_value = transformer(
408466
self.attribute, patterns, self.case_sensitive, self.match_beginning)
409467
new_class = DiscreteVariable(
410-
"class", names, compute_value=compute_value)
468+
self.class_name, names, compute_value=compute_value)
411469
new_domain = Domain(
412470
domain.attributes, new_class, domain.metas + domain.class_vars)
413471
new_data = Table(new_domain, self.data)
414472
self.send("Data", new_data)
415473

474+
def send_report(self):
475+
def _cond_part():
476+
rule = "<b>{}</b> ".format(class_name)
477+
if patt:
478+
rule += "if <b>{}</b> contains <b>{}</b>".format(
479+
self.attribute.name, patt)
480+
else:
481+
rule += "otherwise"
482+
return rule
483+
484+
def _count_part():
485+
if not n_matched:
486+
return "all {} matching instances are already covered " \
487+
"above".format(n_total)
488+
elif n_matched < n_total and patt:
489+
return "{} matching instances (+ {} that are already " \
490+
"covered above".format(n_matched, n_total - n_matched)
491+
else:
492+
return "{} matching instances".format(n_matched)
493+
494+
if not self.attribute:
495+
return
496+
self.report_items("Input", [("Source attribute", self.attribute.name)])
497+
output = ""
498+
names = self.class_labels()
499+
for (n_matched, n_total), class_name, (lab, patt) in \
500+
zip(self.match_counts, names, self.active_rules):
501+
if lab or patt or n_total:
502+
output += "<li>{}; {}</li>".format(_cond_part(), _count_part())
503+
if output:
504+
self.report_items("Output", [("Class name", self.class_name)])
505+
self.report_raw("<ol>{}</ol>".format(output))
506+
416507

417508
def main(): # pragma: no cover
418509
"""Simple test for manual inspection of the widget"""

Orange/widgets/data/tests/test_owcreateclass.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,9 @@ def setUp(self):
130130
self.no_attributes = Table("iris")[:, :4]
131131

132132
def _test_default_rules(self):
133-
self.assertEqual(self.widget.active_rules, [["C1", ""], ["C2", ""]])
133+
self.assertEqual(self.widget.active_rules, [["", ""], ["", ""]])
134134
for i, (label, pattern) in enumerate(self.widget.line_edits):
135-
self.assertEqual(label.text(), "C{}".format(i + 1))
135+
self.assertEqual(label.text(), "".format(i + 1))
136136
self.assertEqual(pattern.text(), "")
137137

138138
def _set_attr(self, attr):
@@ -181,7 +181,7 @@ def test_string_data(self):
181181
self._set_attr(self.zoo.domain.metas[0])
182182
widget.line_edits[0][1].setText("a")
183183

184-
self._check_counts([["54", ""], ["47", "+ 54"]])
184+
self._check_counts([["54", ""], ["47", ""]])
185185

186186
widget.apply()
187187
outdata = self.get_output("Data")
@@ -229,7 +229,7 @@ def test_flow_and_context_handling(self):
229229

230230
widget.apply()
231231
outdata = self.get_output("Data")
232-
self.assertEqual(outdata.domain.class_var.values, ["C1", "C2"])
232+
self.assertEqual(outdata.domain.class_var.values, ["C1"])
233233
classes = outdata.get_column_view("class")[0]
234234
np.testing.assert_equal(classes, 0)
235235

@@ -247,6 +247,8 @@ def test_flow_and_context_handling(self):
247247
self._test_default_rules()
248248

249249
widget.line_edits[0][1].setText("ema")
250+
self._check_counts([["97", ""], ["206", ""]])
251+
widget.line_edits[1][1].setText("ma")
250252
self._check_counts([["97", ""], ["206", "+ 97"]])
251253

252254
widget.apply()
@@ -286,15 +288,15 @@ def test_add_remove_lines(self):
286288
widget = self.widget
287289
self.send_signal("Data", self.heart)
288290
self._set_thal()
289-
widget.add_button.click()
291+
widget.add_row()
290292
self.assertEqual(len(widget.line_edits), 3)
291293
widget.line_edits[2][0].setText("Cls3")
292294
widget.line_edits[2][1].setText("a")
293295
# Observing counts suffices to deduct that rules are set correctly
294296
self._check_counts([["117", ""], ["18", "+ 117"], ["166", "+ 117"]])
295297

296-
widget.add_button.click()
297-
widget.add_button.click()
298+
widget.add_row()
299+
widget.add_row()
298300
widget.line_edits[3][1].setText("c")
299301
widget.line_edits[4][1].setText("b")
300302
widget.apply()
@@ -321,7 +323,8 @@ def test_add_remove_lines(self):
321323
widget.remove_buttons[0].click()
322324
widget.apply()
323325
outdata = self.get_output("Data")
324-
self.assertIsNone(outdata)
326+
np.testing.assert_equal(self.heart.X, outdata.X)
327+
self.assertTrue(np.all(np.isnan(outdata.Y)))
325328

326329
def test_options(self):
327330
def _transformer_flags():
@@ -339,5 +342,23 @@ def _transformer_flags():
339342
widget.controls.match_beginning.click()
340343
self.assertEqual(_transformer_flags(), (False, True))
341344

345+
def test_report(self):
346+
"""Report does not crash"""
347+
widget = self.widget
348+
widget.send_report()
349+
350+
self.send_signal("Data", self.heart)
351+
thal = self.heart.domain["thal"]
352+
self._set_attr(thal)
353+
widget.line_edits[0][0].setText("Cls3")
354+
widget.line_edits[0][1].setText("a")
355+
widget.send_report()
356+
357+
widget.line_edits[1][1].setText("b")
358+
widget.send_report()
359+
360+
widget.line_edits[1][1].setText("c")
361+
widget.send_report()
362+
342363
if __name__ == "__main__":
343364
unittest.main()

0 commit comments

Comments
 (0)