1
1
"""Widget for creating classes from non-numeric attribute by substrings"""
2
+ import re
3
+ from itertools import count
4
+
2
5
import numpy as np
3
6
4
7
from AnyQt .QtWidgets import QGridLayout , QLabel , QLineEdit , QSizePolicy
@@ -133,6 +136,7 @@ class OWCreateClass(widget.OWWidget):
133
136
134
137
settingsHandler = DomainContextHandler ()
135
138
attribute = ContextSetting (None )
139
+ class_name = ContextSetting ("class" )
136
140
rules = ContextSetting ({})
137
141
match_beginning = ContextSetting (False )
138
142
case_sensitive = ContextSetting (False )
@@ -147,7 +151,7 @@ def __init__(self):
147
151
super ().__init__ ()
148
152
self .data = None
149
153
150
- # The following lists are of the same length as self.activeRules
154
+ # The following lists are of the same length as self.active_rules
151
155
152
156
#: list of pairs with counts of matches for each patter when the
153
157
# patterns are applied in order and when applied on the entire set,
@@ -161,31 +165,39 @@ def __init__(self):
161
165
#: list of list of QLabel: pairs of labels with counts
162
166
self .counts = []
163
167
164
- patternbox = gui .vBox ( self . controlArea , box = "Patterns" )
165
- box = gui . hBox ( patternbox )
166
- gui . widgetLabel ( box , "Class from column: " , addSpace = 12 )
167
- gui . comboBox (
168
- box , self , "attribute" , callback = self . update_rules ,
169
- model = DomainModel ( valid_types = ( StringVariable , DiscreteVariable )),
170
- sizePolicy = (QSizePolicy .MinimumExpanding , QSizePolicy .Fixed ) )
168
+ combo = gui .comboBox (
169
+ self . controlArea , self , "attribute" , label = "From column: " ,
170
+ box = True , orientation = Qt . Horizontal , callback = self . update_rules ,
171
+ model = DomainModel ( valid_types = ( StringVariable , DiscreteVariable )))
172
+ # Don't use setSizePolicy keyword argument here: it applies to box ,
173
+ # not the combo
174
+ combo . setSizePolicy (QSizePolicy .MinimumExpanding , QSizePolicy .Fixed )
171
175
176
+ patternbox = gui .vBox (self .controlArea , box = True )
172
177
#: QWidget: the box that contains the remove buttons, line edits and
173
178
# count labels. The lines are added and removed dynamically.
174
179
self .rules_box = rules_box = QGridLayout ()
175
180
patternbox .layout ().addLayout (self .rules_box )
176
- self .add_button = gui .button (None , self , "+" , flat = True ,
177
- callback = self .add_row , autoDefault = False ,
178
- minimumSize = QSize (12 , 20 ))
179
- self .rules_box .setColumnMinimumWidth (1 , 80 )
181
+ box = gui .hBox (patternbox )
182
+ gui .button (
183
+ box , self , "+" , callback = self .add_row , autoDefault = False , flat = True ,
184
+ minimumSize = (QSize (20 , 20 )))
185
+ gui .rubber (box )
186
+ self .rules_box .setColumnMinimumWidth (1 , 70 )
180
187
self .rules_box .setColumnMinimumWidth (0 , 10 )
181
188
self .rules_box .setColumnStretch (0 , 1 )
182
189
self .rules_box .setColumnStretch (1 , 1 )
183
190
self .rules_box .setColumnStretch (2 , 100 )
184
191
rules_box .addWidget (QLabel ("Name" ), 0 , 1 )
185
- rules_box .addWidget (QLabel ("Pattern " ), 0 , 2 )
192
+ rules_box .addWidget (QLabel ("Substring " ), 0 , 2 )
186
193
rules_box .addWidget (QLabel ("#Instances" ), 0 , 3 , 1 , 2 )
187
194
self .update_rules ()
188
195
196
+ gui .lineEdit (
197
+ self .controlArea , self , "class_name" ,
198
+ label = "Name for the new class:" ,
199
+ box = True , orientation = Qt .Horizontal )
200
+
189
201
optionsbox = gui .vBox (self .controlArea , box = True )
190
202
gui .checkBox (
191
203
optionsbox , self , "match_beginning" , "Match only at the beginning" ,
@@ -194,9 +206,14 @@ def __init__(self):
194
206
optionsbox , self , "case_sensitive" , "Case sensitive" ,
195
207
callback = self .options_changed )
196
208
197
- box = gui .hBox (self .controlArea )
198
- gui .rubber (box )
199
- gui .button (box , self , "Apply" , autoDefault = False , callback = self .apply )
209
+ layout = QGridLayout ()
210
+ gui .widgetBox (self .controlArea , orientation = layout )
211
+ for i in range (3 ):
212
+ layout .setColumnStretch (i , 1 )
213
+ layout .addWidget (self .report_button , 0 , 0 )
214
+ apply = gui .button (None , self , "Apply" , autoDefault = False ,
215
+ callback = self .apply )
216
+ layout .addWidget (apply , 0 , 2 )
200
217
201
218
# TODO: Resizing upon changing the number of rules does not work
202
219
self .setSizePolicy (QSizePolicy .Preferred , QSizePolicy .Maximum )
@@ -209,7 +226,7 @@ def active_rules(self):
209
226
set the default.
210
227
"""
211
228
return self .rules .setdefault (self .attribute and self .attribute .name ,
212
- [["C1 " , "" ], ["C2 " , "" ]])
229
+ [["" , "" ], ["" , "" ]])
213
230
214
231
def rules_to_edits (self ):
215
232
"""Fill the line edites with the rules from the current settings."""
@@ -293,26 +310,39 @@ def _fix_tab_order():
293
310
_add_line ()
294
311
while len (self .line_edits ) > n :
295
312
_remove_line ()
296
- self .rules_box .addWidget (self .add_button , n + 1 , 0 )
297
313
_fix_tab_order ()
298
314
299
315
def add_row (self ):
300
316
"""Append a new row at the end."""
301
317
self .active_rules .append (["" , "" ])
302
318
self .adjust_n_rule_rows ()
319
+ self .update_counts ()
303
320
304
321
def remove_row (self ):
305
322
"""Remove a row."""
306
323
remove_idx = self .remove_buttons .index (self .sender ())
307
324
del self .active_rules [remove_idx ]
308
325
self .update_rules ()
326
+ self .update_counts ()
309
327
310
328
def sync_edit (self , text ):
311
329
"""Handle changes in line edits: update the active rules and counts"""
312
330
edit = self .sender ()
313
331
edit .row [edit .col_idx ] = text
314
332
self .update_counts ()
315
333
334
+ def class_labels (self ):
335
+ """Construct a list of class labels. Empty labels are replaced with
336
+ C1, C2, C3. If C<n> already appears in the list of values given by
337
+ the user, the labels start at C<n+1> instead.
338
+ """
339
+ largest_c = max ((int (label [1 :]) for label , _ in self .active_rules
340
+ if re .match ("^C\\ d+" , label )),
341
+ default = 0 )
342
+ class_count = count (largest_c + 1 )
343
+ return [label_edit .text () or "C{}" .format (next (class_count ))
344
+ for label_edit , _ in self .line_edits ]
345
+
316
346
def update_counts (self ):
317
347
"""Recompute and update the counts of matches."""
318
348
def _matcher (strings , pattern ):
@@ -375,12 +405,33 @@ def _clear_labels():
375
405
376
406
def _set_labels ():
377
407
"""Set the labels to show the counts"""
378
- for (n_matched , n_total ), (lab_matched , lab_total ) in \
379
- zip (self .match_counts , self .counts ):
408
+ for (n_matched , n_total ), (lab_matched , lab_total ), ( lab , patt ) in \
409
+ zip (self .match_counts , self .counts , self . active_rules ):
380
410
n_before = n_total - n_matched
381
411
lab_matched .setText ("{}" .format (n_matched ))
382
- if n_before :
412
+ if n_before and ( lab or patt ) :
383
413
lab_total .setText ("+ {}" .format (n_before ))
414
+ if n_matched :
415
+ tip = "{} of the {} matching instances are already " \
416
+ "covered above" .format (n_before , n_total )
417
+ else :
418
+ tip = "All matching instances are already covered above"
419
+ lab_total .setToolTip (tip )
420
+ lab_matched .setToolTip (tip )
421
+
422
+ def _set_placeholders ():
423
+ """Set placeholders for empty edit lines"""
424
+ matches = [n for n , _ in self .match_counts ] + \
425
+ [0 ] * len (self .line_edits )
426
+ for n_matched , (_ , patt ) in zip (matches , self .line_edits ):
427
+ if not patt .text ():
428
+ patt .setPlaceholderText (
429
+ "(remaining instances)" if n_matched else "(unused)" )
430
+
431
+ labels = self .class_labels ()
432
+ for label , (lab_edit , _ ) in zip (labels , self .line_edits ):
433
+ if not lab_edit .text ():
434
+ lab_edit .setPlaceholderText (label )
384
435
385
436
_clear_labels ()
386
437
attr = self .attribute
@@ -392,27 +443,67 @@ def _set_labels():
392
443
self .match_counts = [[int (np .sum (x )) for x in matches ]
393
444
for matches in counters [type (attr )]()]
394
445
_set_labels ()
446
+ _set_placeholders ()
395
447
396
448
def apply (self ):
397
449
"""Output the transformed data."""
398
- if not self .attribute or not self . active_rules :
450
+ if not self .attribute :
399
451
self .send ("Data" , None )
400
452
return
401
453
domain = self .data .domain
454
+ rules = self .active_rules
402
455
# Transposition + stripping
403
- names , patterns = \
404
- zip (* ((name .strip (), pattern )
405
- for name , pattern in self .active_rules if name .strip ()))
456
+ valid_rules = [label or pattern or n_matches
457
+ for (label , pattern ), n_matches in
458
+ zip (rules , self .match_counts )]
459
+ patterns = [pattern
460
+ for (_ , pattern ), valid in zip (rules , valid_rules )
461
+ if valid ]
462
+ names = [name for name , valid in zip (self .class_labels (), valid_rules )
463
+ if valid ]
406
464
transformer = self .TRANSFORMERS [type (self .attribute )]
407
465
compute_value = transformer (
408
466
self .attribute , patterns , self .case_sensitive , self .match_beginning )
409
467
new_class = DiscreteVariable (
410
- "class" , names , compute_value = compute_value )
468
+ self . class_name , names , compute_value = compute_value )
411
469
new_domain = Domain (
412
470
domain .attributes , new_class , domain .metas + domain .class_vars )
413
471
new_data = Table (new_domain , self .data )
414
472
self .send ("Data" , new_data )
415
473
474
+ def send_report (self ):
475
+ def _cond_part ():
476
+ rule = "<b>{}</b> " .format (class_name )
477
+ if patt :
478
+ rule += "if <b>{}</b> contains <b>{}</b>" .format (
479
+ self .attribute .name , patt )
480
+ else :
481
+ rule += "otherwise"
482
+ return rule
483
+
484
+ def _count_part ():
485
+ if not n_matched :
486
+ return "all {} matching instances are already covered " \
487
+ "above" .format (n_total )
488
+ elif n_matched < n_total and patt :
489
+ return "{} matching instances (+ {} that are already " \
490
+ "covered above" .format (n_matched , n_total - n_matched )
491
+ else :
492
+ return "{} matching instances" .format (n_matched )
493
+
494
+ if not self .attribute :
495
+ return
496
+ self .report_items ("Input" , [("Source attribute" , self .attribute .name )])
497
+ output = ""
498
+ names = self .class_labels ()
499
+ for (n_matched , n_total ), class_name , (lab , patt ) in \
500
+ zip (self .match_counts , names , self .active_rules ):
501
+ if lab or patt or n_total :
502
+ output += "<li>{}; {}</li>" .format (_cond_part (), _count_part ())
503
+ if output :
504
+ self .report_items ("Output" , [("Class name" , self .class_name )])
505
+ self .report_raw ("<ol>{}</ol>" .format (output ))
506
+
416
507
417
508
def main (): # pragma: no cover
418
509
"""Simple test for manual inspection of the widget"""
0 commit comments