Skip to content

Commit f3acdd2

Browse files
authored
Improve caching strategy for inventory cache (#49693)
* Improve caching strategy for inventory cache Inventory cache is used to decrease the potential failures coming from not-available (Tempoararily) external inventories of intersphinx. This includes both - airflow package inventory and external intersphinx docs as well. We are more concerned about availability of third-party inventories than ours, so we should use the cache mostly for the external packages rather than for ours and we should refresh it every time any dependency changes. The caching strategy implemented originally is to keep inventories for 12 hours, but this might cause 12 hours of failures in CI since we are using latest cached inventories for stability. This change adds --refresh-airflow-inventories flag and uses it in the CI to always refresh airflow inventories and leave the other cached inventories. * fixup! Improve caching strategy for inventory cache
1 parent 1df5b41 commit f3acdd2

File tree

7 files changed

+147
-51
lines changed

7 files changed

+147
-51
lines changed

.github/workflows/ci-image-checks.yml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -281,19 +281,18 @@ jobs:
281281
uses: apache/infrastructure-actions/stash/restore@1c35b5ccf8fba5d4c3fdf25a045ca91aa0cbc468
282282
with:
283283
path: ./generated/_inventory_cache/
284-
# TODO(potiuk): do better with determining the key
285-
key: cache-docs-inventory-v1-${{ hashFiles('pyproject.toml') }}
284+
key: cache-docs-inventory-v1-${{ hashFiles('**/pyproject.toml') }}
286285
id: restore-docs-inventory-cache
287286
- name: "Building docs with ${{ matrix.flag }} flag"
288287
env:
289288
DOCS_LIST_AS_STRING: ${{ inputs.docs-list-as-string }}
290289
run: >
291-
breeze build-docs ${DOCS_LIST_AS_STRING} ${{ matrix.flag }}
290+
breeze build-docs ${DOCS_LIST_AS_STRING} ${{ matrix.flag }} --refresh-airflow-inventories
292291
- name: "Save docs inventory cache"
293292
uses: apache/infrastructure-actions/stash/save@1c35b5ccf8fba5d4c3fdf25a045ca91aa0cbc468
294293
with:
295294
path: ./generated/_inventory_cache/
296-
key: cache-docs-inventory-v1-${{ hashFiles('pyproject.toml') }}
295+
key: cache-docs-inventory-v1-${{ hashFiles('**/pyproject.toml') }}
297296
if-no-files-found: 'error'
298297
retention-days: '2'
299298
# If we upload from multiple matrix jobs we could end up with a race condition. so just pick one job

dev/breeze/doc/images/output_build-docs.svg

Lines changed: 67 additions & 31 deletions
Loading
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
8ae2b68e8981f5fc57c32b7e064211bd
1+
0da87f68c7956d7611372a00164b6c5c

dev/breeze/src/airflow_breeze/commands/developer_commands.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -667,9 +667,15 @@ def start_airflow(
667667
@option_builder
668668
@click.option(
669669
"--clean-build",
670-
help="Clean inventories of Inter-Sphinx documentation and generated APIs and sphinx artifacts "
671-
"before the build - useful for a clean build.",
672670
is_flag=True,
671+
help="Cleans the build directory before building the documentation and removes all inventory "
672+
"cache (including external inventories).",
673+
)
674+
@click.option(
675+
"--refresh-airflow-inventories",
676+
is_flag=True,
677+
help="When set, only airflow package inventories will be refreshed, regardless "
678+
"if they are already downloaded. With `--clean-build` - everything is cleaned..",
673679
)
674680
@click.option("-d", "--docs-only", help="Only build documentation.", is_flag=True)
675681
@option_dry_run
@@ -704,6 +710,7 @@ def start_airflow(
704710
def build_docs(
705711
builder: str,
706712
clean_build: bool,
713+
refresh_airflow_inventories: bool,
707714
docs_only: bool,
708715
github_repository: str,
709716
include_not_ready_providers: bool,
@@ -730,9 +737,17 @@ def build_docs(
730737
directories_to_clean = ["apis"]
731738
generated_path = AIRFLOW_ROOT_PATH / "generated"
732739
for dir_name in directories_to_clean:
740+
get_console().print("Removing all generated dirs.")
733741
for directory in generated_path.rglob(dir_name):
734742
get_console().print(f"[info]Removing {directory}")
735743
shutil.rmtree(directory, ignore_errors=True)
744+
if refresh_airflow_inventories and not clean_build:
745+
get_console().print("Removing airflow inventories.")
746+
package_globs = ["helm-chart", "docker-stack", "apache-airflow*"]
747+
for package_glob in package_globs:
748+
for directory in (generated_path / "_inventory_cache").rglob(package_glob):
749+
get_console().print(f"[info]Removing {directory}")
750+
shutil.rmtree(directory, ignore_errors=True)
736751

737752
docs_list_as_tuple: tuple[str, ...] = ()
738753
if distributions_list and len(distributions_list):

dev/breeze/src/airflow_breeze/commands/developer_commands_config.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -304,15 +304,28 @@
304304
],
305305
"breeze build-docs": [
306306
{
307-
"name": "Doc flags",
307+
"name": "Build scope (default is to build docs and spellcheck)",
308+
"options": ["--docs-only", "--spellcheck-only"],
309+
},
310+
{
311+
"name": "Type of build",
312+
"options": ["--one-pass-only"],
313+
},
314+
{
315+
"name": "Cleaning inventories",
316+
"options": ["--clean-build", "--refresh-airflow-inventories"],
317+
},
318+
{
319+
"name": "Filtering options",
308320
"options": [
309-
"--docs-only",
310-
"--spellcheck-only",
311-
"--clean-build",
312-
"--one-pass-only",
313321
"--package-filter",
314322
"--include-not-ready-providers",
315323
"--include-removed-providers",
324+
],
325+
},
326+
{
327+
"name": "Misc options",
328+
"options": [
316329
"--github-repository",
317330
"--builder",
318331
"--distributions-list",

devel-common/src/docs/build_docs.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,19 @@ def _promote_new_flags():
132132
console.print("You can also use other extra flags to iterate faster:")
133133
console.print(" [bright_blue]--docs-only - Only build documentation[/]")
134134
console.print(" [bright_blue]--spellcheck-only - Only perform spellchecking[/]")
135-
console.print(" [bright_blue]--clean-build - Refresh inventories for inter-sphinx references[/]")
136135
console.print()
137136
console.print("You can list all packages you can build:")
138137
console.print()
139138
console.print(" [bright_blue]--list-packages - Shows the list of packages you can build[/]")
140139
console.print()
140+
console.print("You can run clean build - refreshing inter-sphinx inventories or refresh airflow ones.\n")
141+
console.print(
142+
" [bright_blue]--clean-build - Refresh inventories and build files for all inter-sphinx references (including external ones)[/]"
143+
)
144+
console.print(
145+
" [bright_blue]--refresh-airflow-inventories - Force refresh only airflow inventories (without cleaning build files or external inventories).[/]"
146+
)
147+
console.print()
141148
console.print("For more info:")
142149
console.print(" [bright_blue]uv run build-docs --help[/]")
143150
console.print()
@@ -457,7 +464,11 @@ def is_command_available(command):
457464
},
458465
{
459466
"name": "Type of build",
460-
"options": ["--autobuild", "--one-pass-only", "--clean-build"],
467+
"options": ["--autobuild", "--one-pass-only"],
468+
},
469+
{
470+
"name": "Cleaning inventories",
471+
"options": ["--clean-build", "--refresh-airflow-inventories"],
461472
},
462473
{
463474
"name": "Filtering options",
@@ -497,9 +508,6 @@ def is_command_available(command):
497508
"only that package is selected to build. "
498509
"If the command is run in the root of the Airflow repo, all packages are selected to be built.",
499510
)
500-
@click.option(
501-
"--clean-build", is_flag=True, help="Cleans the build directory before building the documentation."
502-
)
503511
@click.option("--docs-only", is_flag=True, help="Only build documentation")
504512
@click.option("--spellcheck-only", is_flag=True, help="Only perform spellchecking")
505513
@click.option(
@@ -516,6 +524,18 @@ def is_command_available(command):
516524
is_flag=True,
517525
help="Lists all available packages. You can use it to check the names of the packages you want to build.",
518526
)
527+
@click.option(
528+
"--clean-build",
529+
is_flag=True,
530+
help="Cleans the build directory before building the documentation and removes all inventory "
531+
"cache (including external inventories).",
532+
)
533+
@click.option(
534+
"--refresh-airflow-inventories",
535+
is_flag=True,
536+
help="When set, only airflow package inventories will be refreshed, regardless "
537+
"if they are already downloaded. With `--clean-build` - everything is cleaned..",
538+
)
519539
@click.option(
520540
"-v",
521541
"--verbose",
@@ -537,6 +557,7 @@ def build_docs(
537557
spellcheck_only,
538558
jobs,
539559
list_packages,
560+
refresh_airflow_inventories,
540561
verbose,
541562
packages,
542563
):
@@ -579,7 +600,9 @@ def build_docs(
579600
with with_group("Fetching inventories"):
580601
# Inventories that could not be retrieved should be built first. This may mean this is a
581602
# new package.
582-
packages_without_inventories = fetch_inventories(clean_build=clean_build)
603+
packages_without_inventories = fetch_inventories(
604+
clean_build=clean_build, refresh_airflow_inventories=refresh_airflow_inventories
605+
)
583606
normal_packages, priority_packages = partition(
584607
lambda d: d in packages_without_inventories, packages_to_build
585608
)

devel-common/src/sphinx_exts/docs_build/fetch_inventories.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,13 @@ def _is_outdated(path: str):
9393
return delta > datetime.timedelta(hours=12)
9494

9595

96-
def fetch_inventories(clean_build: bool) -> list[str]:
96+
def should_be_refreshed(pkg_name: str, refresh_airflow_inventories: bool) -> bool:
97+
if pkg_name in ["helm-chart", "docker-stack"] or pkg_name.startswith("apache-airflow"):
98+
return refresh_airflow_inventories
99+
return False
100+
101+
102+
def fetch_inventories(clean_build: bool, refresh_airflow_inventories: bool = False) -> list[str]:
97103
"""Fetch all inventories for Airflow documentation packages and store in cache."""
98104
if clean_build:
99105
shutil.rmtree(CACHE_PATH)
@@ -136,7 +142,11 @@ def fetch_inventories(clean_build: bool) -> list[str]:
136142
for pkg_name, doc_url in THIRD_PARTY_INDEXES.items()
137143
)
138144

139-
to_download = [(pkg_name, url, path) for pkg_name, url, path in to_download if _is_outdated(path)]
145+
to_download = [
146+
(pkg_name, url, path)
147+
for pkg_name, url, path in to_download
148+
if _is_outdated(path) or should_be_refreshed(pkg_name, refresh_airflow_inventories)
149+
]
140150
if not to_download:
141151
print("Nothing to do")
142152
return []

0 commit comments

Comments
 (0)