Skip to content

Commit f684f86

Browse files
committed
Refactor enrich_images_unsorted_sync/enrich_images_unsorted_async
1 parent 2f9336d commit f684f86

File tree

4 files changed

+85
-85
lines changed

4 files changed

+85
-85
lines changed

fractal_server/app/routes/api/v2/history.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from fractal_server.app.schemas.v2 import HistoryUnitStatus
3535
from fractal_server.app.schemas.v2 import HistoryUnitStatusWithUnset
3636
from fractal_server.app.schemas.v2 import ImageLogsRequest
37-
from fractal_server.images.status_tools import enrich_images_async
37+
from fractal_server.images.status_tools import enrich_images_unsorted_async
3838
from fractal_server.images.status_tools import IMAGE_STATUS_KEY
3939
from fractal_server.images.tools import aggregate_attributes
4040
from fractal_server.images.tools import aggregate_types
@@ -334,7 +334,7 @@ async def get_history_images(
334334
types = aggregate_types(type_filtered_images)
335335

336336
# (3) Enrich images with status attribute
337-
type_filtered_images_with_status = await enrich_images_async(
337+
type_filtered_images_with_status = await enrich_images_unsorted_async(
338338
dataset_id=dataset_id,
339339
workflowtask_id=workflowtask_id,
340340
images=type_filtered_images,

fractal_server/app/routes/api/v2/pre_submission_checks.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from fractal_server.app.routes.auth import current_active_user
1515
from fractal_server.app.schemas.v2 import HistoryUnitStatus
1616
from fractal_server.app.schemas.v2 import TaskType
17-
from fractal_server.images.status_tools import enrich_images_async
17+
from fractal_server.images.status_tools import enrich_images_unsorted_async
1818
from fractal_server.images.status_tools import IMAGE_STATUS_KEY
1919
from fractal_server.images.tools import aggregate_types
2020
from fractal_server.images.tools import filter_image_list
@@ -46,7 +46,7 @@ async def verify_unique_types(
4646
filtered_images = dataset.images
4747
else:
4848
if IMAGE_STATUS_KEY in query.attribute_filters.keys():
49-
images = await enrich_images_async(
49+
images = await enrich_images_unsorted_async(
5050
dataset_id=dataset_id,
5151
workflowtask_id=workflowtask_id,
5252
images=dataset.images,
@@ -134,7 +134,7 @@ async def check_non_processed_images(
134134
attribute_filters=filters.attribute_filters,
135135
)
136136

137-
filtered_images_with_status = await enrich_images_async(
137+
filtered_images_with_status = await enrich_images_unsorted_async(
138138
dataset_id=dataset_id,
139139
workflowtask_id=previous_wft.id,
140140
images=filtered_images,

fractal_server/app/runner/v2/runner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from fractal_server.app.schemas.v2 import TaskGroupDumpV2
3333
from fractal_server.app.schemas.v2 import TaskType
3434
from fractal_server.images import SingleImage
35-
from fractal_server.images.status_tools import enrich_images_sync
35+
from fractal_server.images.status_tools import enrich_images_unsorted_sync
3636
from fractal_server.images.status_tools import IMAGE_STATUS_KEY
3737
from fractal_server.images.tools import filter_image_list
3838
from fractal_server.images.tools import find_image_by_zarr_url
@@ -147,7 +147,7 @@ def execute_tasks_v2(
147147

148148
if ind_wftask == 0 and ENRICH_IMAGES_WITH_STATUS:
149149
# FIXME: Could this be done on `type_filtered_images`?
150-
tmp_images = enrich_images_sync(
150+
tmp_images = enrich_images_unsorted_sync(
151151
images=tmp_images,
152152
dataset_id=dataset.id,
153153
workflowtask_id=wftask.id,

fractal_server/images/status_tools.py

Lines changed: 78 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,18 @@
1010
from fractal_server.app.models.v2 import HistoryUnit
1111
from fractal_server.app.schemas.v2 import HistoryUnitStatusWithUnset
1212
from fractal_server.logger import set_logger
13-
from fractal_server.types import ImageAttributeValue
1413

1514
logger = set_logger(__name__)
1615

1716

1817
IMAGE_STATUS_KEY = "__wftask_dataset_image_status__"
1918

2019

21-
def _enriched_image(*, img: dict[str, Any], status: str) -> dict[str, Any]:
20+
def _enriched_image(
21+
*,
22+
img: dict[str, Any],
23+
status: str,
24+
) -> dict[str, Any]:
2225
return img | {
2326
"attributes": (img["attributes"] | {IMAGE_STATUS_KEY: status})
2427
}
@@ -29,6 +32,9 @@ def _prepare_query(
2932
dataset_id: int,
3033
workflowtask_id: int,
3134
) -> Select:
35+
"""
36+
Note: the query does not include `.order_by`.
37+
"""
3238
stm = (
3339
select(HistoryImageCache.zarr_url, HistoryUnit.status)
3440
.join(HistoryUnit)
@@ -39,13 +45,56 @@ def _prepare_query(
3945
return stm
4046

4147

42-
async def enrich_images_async(
48+
def _postprocess_image_lists(
49+
target_images: list[dict[str, Any]],
50+
list_query_url_status: list[tuple[str, str]],
51+
) -> list[dict[str, Any]]:
52+
""" """
53+
t_1 = time.perf_counter()
54+
55+
# Select only processed images that are part of the target image set
56+
zarr_url_to_image = {img["zarr_url"]: img for img in target_images}
57+
target_zarr_urls = zarr_url_to_image.keys()
58+
list_processed_url_status = [
59+
url_status
60+
for url_status in list_query_url_status
61+
if url_status[0] in target_zarr_urls
62+
]
63+
64+
set_processed_urls = set(
65+
url_status[0] for url_status in list_processed_url_status
66+
)
67+
processed_images_with_status = [
68+
_enriched_image(
69+
img=zarr_url_to_image[item[0]],
70+
status=item[1],
71+
)
72+
for item in list_processed_url_status
73+
]
74+
75+
non_processed_urls = target_zarr_urls - set_processed_urls
76+
non_processed_images_with_status = [
77+
_enriched_image(
78+
img=zarr_url_to_image[zarr_url],
79+
status=HistoryUnitStatusWithUnset.UNSET,
80+
)
81+
for zarr_url in non_processed_urls
82+
]
83+
t_2 = time.perf_counter()
84+
logger.debug(
85+
f"[enrich_images_async] post-processing, elapsed={t_2 - t_1:.5f} s"
86+
)
87+
88+
return processed_images_with_status + non_processed_images_with_status
89+
90+
91+
async def enrich_images_unsorted_async(
4392
*,
4493
images: list[dict[str, Any]],
4594
dataset_id: int,
4695
workflowtask_id: int,
4796
db: AsyncSession,
48-
) -> list[dict[str, ImageAttributeValue]]:
97+
) -> list[dict[str, Any]]:
4998
"""
5099
Enrich images with a status-related attribute.
51100
@@ -56,124 +105,75 @@ async def enrich_images_async(
56105
db: An async db session
57106
58107
Returns:
59-
The list of enriched images
108+
The list of enriched images, not necessarily in the same order as
109+
the input.
60110
"""
61111
t_0 = time.perf_counter()
62112
logger.info(
63113
f"[enrich_images_async] START, {dataset_id=}, {workflowtask_id=}"
64114
)
65115

66-
zarr_url_to_image = {img["zarr_url"]: img for img in images}
67-
116+
# Get `(zarr_url, status)` for _all_ processed images (including those that
117+
# are not part of the target image set)
68118
res = await db.execute(
69119
_prepare_query(
70120
dataset_id=dataset_id,
71121
workflowtask_id=workflowtask_id,
72122
)
73123
)
74-
zarr_urls = zarr_url_to_image.keys()
75-
list_processed_url_status = [
76-
item for item in res.all() if item[0] in zarr_urls
77-
]
124+
list_query_url_status = res.all()
78125
t_1 = time.perf_counter()
79-
logger.debug(f"[enrich_images_async] db-query, elapsed={t_1 - t_0:.4f} s")
80-
81-
set_processed_urls = set(item[0] for item in list_processed_url_status)
82-
processed_images_with_status = [
83-
_enriched_image(
84-
img=zarr_url_to_image[item[0]],
85-
status=item[1],
86-
)
87-
for item in list_processed_url_status
88-
]
89-
t_2 = time.perf_counter()
90-
logger.debug(
91-
"[enrich_images_async] processed-images, " f"elapsed={t_2 - t_1:.4f} s"
92-
)
126+
logger.debug(f"[enrich_images_async] query, elapsed={t_1 - t_0:.5f} s")
93127

94-
non_processed_urls = zarr_url_to_image.keys() - set_processed_urls
95-
non_processed_images_with_status = [
96-
_enriched_image(
97-
img=zarr_url_to_image[zarr_url],
98-
status=HistoryUnitStatusWithUnset.UNSET,
99-
)
100-
for zarr_url in non_processed_urls
101-
]
102-
t_3 = time.perf_counter()
103-
logger.debug(
104-
"[enrich_images_async] non-processed-images, "
105-
f"elapsed={t_3 - t_2:.4f} s"
128+
output = _postprocess_image_lists(
129+
target_images=images,
130+
list_query_url_status=list_query_url_status,
106131
)
107132

108-
return processed_images_with_status + non_processed_images_with_status
133+
return output
109134

110135

111-
def enrich_images_sync(
136+
def enrich_images_unsorted_sync(
112137
*,
113138
images: list[dict[str, Any]],
114139
dataset_id: int,
115140
workflowtask_id: int,
116-
) -> list[dict[str, ImageAttributeValue]]:
141+
) -> list[dict[str, Any]]:
117142
"""
118143
Enrich images with a status-related attribute.
119144
145+
120146
Args:
121147
images: The input image list
122148
dataset_id: The dataset ID
123149
workflowtask_id: The workflow-task ID
124150
125151
Returns:
126-
The list of enriched images
152+
The list of enriched images, not necessarily in the same order as
153+
the input.
127154
"""
155+
128156
t_0 = time.perf_counter()
129157
logger.info(
130158
f"[enrich_images_async] START, {dataset_id=}, {workflowtask_id=}"
131159
)
132160

133-
zarr_url_to_image = {img["zarr_url"]: img for img in images}
134-
135-
t_1 = time.perf_counter()
136-
logger.debug(f"[enrich_images_async] deep-copy, elapsed={t_1 - t_0:.4f} s")
137-
161+
# Get `(zarr_url, status)` for _all_ processed images (including those that
162+
# are not part of the target image set)
138163
with next(get_sync_db()) as db:
139164
res = db.execute(
140165
_prepare_query(
141166
dataset_id=dataset_id,
142167
workflowtask_id=workflowtask_id,
143168
)
144169
)
145-
zarr_urls = zarr_url_to_image.keys()
146-
list_processed_url_status = [
147-
item for item in res.all() if item[0] in zarr_urls
148-
]
149-
t_2 = time.perf_counter()
150-
logger.debug(f"[enrich_images_async] db-query, elapsed={t_2 - t_1:.4f} s")
151-
152-
set_processed_urls = set(item[0] for item in list_processed_url_status)
153-
processed_images_with_status = [
154-
_enriched_image(
155-
img=zarr_url_to_image[item[0]],
156-
status=item[1],
157-
)
158-
for item in list_processed_url_status
159-
]
160-
t_3 = time.perf_counter()
161-
logger.debug(
162-
"[enrich_images_async] processed-images, " f"elapsed={t_3 - t_2:.4f} s"
163-
)
170+
list_query_url_status = res.all()
171+
t_1 = time.perf_counter()
172+
logger.debug(f"[enrich_images_async] query, elapsed={t_1 - t_0:.5f} s")
164173

165-
non_processed_urls = zarr_url_to_image.keys() - set_processed_urls
166-
non_processed_images_with_status = [
167-
_enriched_image(
168-
img=zarr_url_to_image[zarr_url],
169-
status=HistoryUnitStatusWithUnset.UNSET,
170-
)
171-
for zarr_url in non_processed_urls
172-
]
173-
t_4 = time.perf_counter()
174-
logger.debug(
175-
"[enrich_images_async] non-processed-images, "
176-
f"elapsed={t_4 - t_3:.4f} s"
174+
output = _postprocess_image_lists(
175+
target_images=images,
176+
list_query_url_status=list_query_url_status,
177177
)
178178

179-
return processed_images_with_status + non_processed_images_with_status
179+
return output

0 commit comments

Comments
 (0)