Skip to content

Commit b175dfa

Browse files
committed
Table.from_table: fix caching with reused ids
In Table.from_table id() of objects are used as cache keys. Because ids can be reused this can be a problem. If keys were not tuples we could use WeakKeyDictionary for cache, but here, because they are, I store weakrefs to original objects as values. The cached value is not used if the objects these weakrefs are pointing to do not exists anymore.
1 parent c5f5dbb commit b175dfa

File tree

1 file changed

+19
-7
lines changed

1 file changed

+19
-7
lines changed

Orange/data/table.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33
import threading
44
import warnings
5+
import weakref
56
import zlib
67
from collections import Iterable, Sequence, Sized
78
from functools import reduce
@@ -316,6 +317,12 @@ def from_table(cls, domain, source, row_indices=...):
316317
:rtype: Orange.data.Table
317318
"""
318319

320+
def valid_refs(weakrefs):
321+
for r in weakrefs:
322+
if r() is None:
323+
return False
324+
return True
325+
319326
def get_columns(row_indices, src_cols, n_rows, dtype=np.float64,
320327
is_sparse=False, variables=[]):
321328
if not len(src_cols):
@@ -356,10 +363,13 @@ def get_columns(row_indices, src_cols, n_rows, dtype=np.float64,
356363
a[:, i] = variables[i].Unknown
357364
elif not isinstance(col, Integral):
358365
if isinstance(col, SharedComputeValue):
359-
if (id(col.compute_shared), id(source)) not in shared_cache:
360-
shared_cache[id(col.compute_shared), id(source)] = \
361-
col.compute_shared(source)
362-
shared = shared_cache[id(col.compute_shared), id(source)]
366+
shared, weakrefs = shared_cache.get((id(col.compute_shared), id(source)),
367+
(None, None))
368+
if shared is None or not valid_refs(weakrefs):
369+
shared, _ = shared_cache[(id(col.compute_shared), id(source))] = \
370+
col.compute_shared(source), \
371+
(weakref.ref(col.compute_shared), weakref.ref(source))
372+
363373
if row_indices is not ...:
364374
a[:, i] = match_density(
365375
col(source, shared_data=shared)[row_indices])
@@ -389,8 +399,9 @@ def get_columns(row_indices, src_cols, n_rows, dtype=np.float64,
389399
if new_cache:
390400
_thread_local.conversion_cache = {}
391401
else:
392-
cached = _thread_local.conversion_cache.get((id(domain), id(source)))
393-
if cached:
402+
cached, weakrefs = \
403+
_thread_local.conversion_cache.get((id(domain), id(source)), (None, None))
404+
if cached and valid_refs(weakrefs):
394405
return cached
395406
if domain is source.domain:
396407
table = cls.from_table_rows(source, row_indices)
@@ -443,7 +454,8 @@ def get_columns(row_indices, src_cols, n_rows, dtype=np.float64,
443454
else:
444455
cls._init_ids(self)
445456
self.attributes = getattr(source, 'attributes', {})
446-
_thread_local.conversion_cache[(id(domain), id(source))] = self
457+
_thread_local.conversion_cache[(id(domain), id(source))] = \
458+
self, (weakref.ref(domain), weakref.ref(source))
447459
return self
448460
finally:
449461
if new_cache:

0 commit comments

Comments
 (0)