@@ -350,14 +350,23 @@ def test_invalidation_flow(self):
350
350
# set global structure "on" (after the embedding is computed)
351
351
w .controls .multiscale .setChecked (False )
352
352
self .send_signal (w .Inputs .data , self .data )
353
+
354
+ # By default, t-SNE is smart and disables PCA preprocessing if the
355
+ # number of features is too low. Since we are testing with the iris
356
+ # data set, we want to force t-SNE to use PCA preprocessing.
357
+ w .controls .use_pca_preprocessing .setChecked (True )
358
+ self .widget .run_button .click ()
359
+
353
360
self .wait_until_finished ()
354
361
self .assertFalse (self .widget .Information .modified .is_shown ())
355
362
# All the embedding components should be computed
363
+ self .assertIsNotNone (w .preprocessed_data )
356
364
self .assertIsNotNone (w .normalized_data )
357
365
self .assertIsNotNone (w .pca_projection )
358
366
self .assertIsNotNone (w .affinities )
359
367
self .assertIsNotNone (w .tsne_embedding )
360
368
# All the invalidation flags should be set to false
369
+ self .assertFalse (w ._invalidated .preprocessed_data )
361
370
self .assertFalse (w ._invalidated .normalized_data )
362
371
self .assertFalse (w ._invalidated .pca_projection )
363
372
self .assertFalse (w ._invalidated .affinities )
@@ -368,13 +377,15 @@ def test_invalidation_flow(self):
368
377
self .assertTrue (self .widget .Information .modified .is_shown ())
369
378
# Setting `multiscale` to true should set the invalidate flags for
370
379
# the affinities and embedding, but not the pca_projection
380
+ self .assertFalse (w ._invalidated .preprocessed_data )
371
381
self .assertFalse (w ._invalidated .normalized_data )
372
382
self .assertFalse (w ._invalidated .pca_projection )
373
383
self .assertTrue (w ._invalidated .affinities )
374
384
self .assertTrue (w ._invalidated .tsne_embedding )
375
385
376
386
# The flags should now be set, but the embedding should still be
377
387
# available when selecting a subset of data and such
388
+ self .assertIsNotNone (w .preprocessed_data )
378
389
self .assertIsNotNone (w .normalized_data )
379
390
self .assertIsNotNone (w .pca_projection )
380
391
self .assertIsNotNone (w .affinities )
@@ -472,6 +483,9 @@ def test_distance_matrix_not_symmetric(self):
472
483
self .send_signal (w .Inputs .distances , DistMatrix ([[1 , 2 , 3 ], [4 , 5 , 6 ]]))
473
484
self .assertTrue (w .Error .distance_matrix_not_symmetric .is_shown ())
474
485
486
+ self .send_signal (w .Inputs .distances , DistMatrix ([[1 , 2 , 3 ], [4 , 5 , 6 ], [7 , 8 , 9 ]]))
487
+ self .assertTrue (w .Error .distance_matrix_not_symmetric .is_shown ())
488
+
475
489
self .send_signal (w .Inputs .distances , None )
476
490
self .assertFalse (w .Error .distance_matrix_not_symmetric .is_shown ())
477
491
@@ -813,6 +827,64 @@ def test_controls_ignored_by_distance_matrix_retain_values_on_table_signal(self)
813
827
self .assertTrue (w .perplexity_spin .isEnabled ())
814
828
self .assertEqual (w .perplexity_spin .value (), 42 )
815
829
830
+ def test_controls_are_properly_disabled_with_sparse_matrix (self ):
831
+ w = self .widget
832
+
833
+ # Normalizing sparse matrix is disabled, since this would require
834
+ # centering
835
+ disabled_fields = ["normalize" ]
836
+ # PCA preprocessing and supported distance metrics are enable for sparse
837
+ # matrices
838
+ enabled_fields = [
839
+ "use_pca_preprocessing" , "distance_metric_idx" , "initialization_method_idx"
840
+ ]
841
+
842
+ self .send_signal (w .Inputs .data , self .iris .to_sparse ())
843
+ self .wait_until_finished ()
844
+
845
+ for field in disabled_fields :
846
+ self .assertFalse (getattr (w .controls , field ).isEnabled ())
847
+ for field in enabled_fields :
848
+ self .assertTrue (getattr (w .controls , field ).isEnabled ())
849
+
850
+ # Send dense table, shoule enable disabled fields
851
+ self .send_signal (w .Inputs .data , self .iris )
852
+ self .wait_until_finished ()
853
+
854
+ for field in disabled_fields :
855
+ self .assertTrue (getattr (w .controls , field ).isEnabled ())
856
+ for field in enabled_fields :
857
+ self .assertTrue (getattr (w .controls , field ).isEnabled ())
858
+
859
+ def test_data_containing_nans (self ):
860
+ x = np .random .normal (0 , 1 , size = (150 , 50 ))
861
+ # Randomly sprinkle a few NaNs into the matrix
862
+ num_nans = 20
863
+ x [np .random .randint (0 , 150 , num_nans ), np .random .randint (0 , 50 , num_nans )] = np .nan
864
+
865
+ nan_data = Table .from_numpy (Domain .from_numpy (x ), x )
866
+
867
+ w = self .widget
868
+
869
+ self .send_signal (w .Inputs .data , nan_data )
870
+ self .assertTrue (w .controls .normalize .isChecked ())
871
+ self .assertTrue (w .controls .use_pca_preprocessing .isChecked ())
872
+ self .widget .run_button .click (), self .wait_until_finished ()
873
+
874
+ # Disable only normalization
875
+ w .controls .normalize .setChecked (False )
876
+ self .widget .run_button .click (), self .wait_until_finished ()
877
+
878
+ # Disable only PCA preprocessing
879
+ w .controls .normalize .setChecked (True )
880
+ w .controls .use_pca_preprocessing .setChecked (False )
881
+ self .widget .run_button .click (), self .wait_until_finished ()
882
+
883
+ # Disable both normalization and PCA preprocessing
884
+ w .controls .normalize .setChecked (False )
885
+ w .controls .use_pca_preprocessing .setChecked (False )
886
+ self .widget .run_button .click (), self .wait_until_finished ()
887
+
816
888
817
889
class TestTSNERunner (unittest .TestCase ):
818
890
@classmethod
@@ -834,8 +906,9 @@ def test_run_with_normalization_and_pca_preprocessing(self):
834
906
)
835
907
task = TSNERunner .run (task , state )
836
908
837
- self .assertEqual (len (state .set_status .mock_calls ), 5 )
909
+ self .assertEqual (len (state .set_status .mock_calls ), 6 )
838
910
state .set_status .assert_has_calls ([
911
+ call ("Preprocessing data..." ),
839
912
call ("Normalizing data..." ),
840
913
call ("Computing PCA..." ),
841
914
call ("Finding nearest neighbors..." ),
@@ -862,8 +935,9 @@ def test_run_with_normalization(self):
862
935
)
863
936
task = TSNERunner .run (task , state )
864
937
865
- self .assertEqual (len (state .set_status .mock_calls ), 4 )
938
+ self .assertEqual (len (state .set_status .mock_calls ), 5 )
866
939
state .set_status .assert_has_calls ([
940
+ call ("Preprocessing data..." ),
867
941
call ("Normalizing data..." ),
868
942
call ("Finding nearest neighbors..." ),
869
943
call ("Preparing initialization..." ),
@@ -890,8 +964,9 @@ def test_run_with_pca_preprocessing(self):
890
964
)
891
965
task = TSNERunner .run (task , state )
892
966
893
- self .assertEqual (len (state .set_status .mock_calls ), 4 )
967
+ self .assertEqual (len (state .set_status .mock_calls ), 5 )
894
968
state .set_status .assert_has_calls ([
969
+ call ("Preprocessing data..." ),
895
970
call ("Computing PCA..." ),
896
971
call ("Finding nearest neighbors..." ),
897
972
call ("Preparing initialization..." ),
@@ -949,7 +1024,6 @@ def test_run_with_distance_matrix(self):
949
1024
task = Task (
950
1025
normalize = False ,
951
1026
use_pca_preprocessing = False ,
952
- # data=self.data,
953
1027
distance_matrix = self .distances ,
954
1028
perplexity = 30 ,
955
1029
initialization_method = "spectral" ,
@@ -1064,6 +1138,34 @@ def test_run_with_distance_matrix_ignores_preprocessing(self):
1064
1138
self .assertIsInstance (task .tsne , TSNE )
1065
1139
self .assertIsInstance (task .tsne_embedding , TSNEModel )
1066
1140
1141
+ def test_run_with_sparse_matrix_ignores_normalization (self ):
1142
+ state = Mock ()
1143
+ state .is_interruption_requested = Mock (return_value = False )
1144
+
1145
+ task = Task (
1146
+ normalize = False ,
1147
+ use_pca_preprocessing = True ,
1148
+ data = self .data .to_sparse (),
1149
+ perplexity = 30 ,
1150
+ initialization_method = "spectral" ,
1151
+ distance_metric = "cosine" ,
1152
+ )
1153
+ task = TSNERunner .run (task , state )
1154
+ self .assertEqual (len (state .set_status .mock_calls ), 5 )
1155
+ state .set_status .assert_has_calls ([
1156
+ call ("Preprocessing data..." ),
1157
+ call ("Computing PCA..." ),
1158
+ call ("Finding nearest neighbors..." ),
1159
+ call ("Preparing initialization..." ),
1160
+ call ("Running optimization..." ),
1161
+ ])
1162
+
1163
+ self .assertIsNone (task .normalized_data )
1164
+ self .assertIsInstance (task .pca_projection , Table )
1165
+ self .assertIsInstance (task .initialization , np .ndarray )
1166
+ self .assertIsInstance (task .tsne , TSNE )
1167
+ self .assertIsInstance (task .tsne_embedding , TSNEModel )
1168
+
1067
1169
1068
1170
if __name__ == "__main__" :
1069
1171
unittest .main ()
0 commit comments