From 580d2b69d895343992af2cbad49c32a0149c2cde Mon Sep 17 00:00:00 2001 From: Curtis Vogt Date: Wed, 26 Jun 2024 16:54:10 -0500 Subject: [PATCH] Fix issues with custom cache eviction (#135) * Keep latest and default branch cache entries * Record skipped cache IDs --- action.yml | 25 ++++++++++++++++++++---- handle_caches.jl | 49 +++++++++++++++++++++++++++++------------------- 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/action.yml b/action.yml index ad88e4f..4a0852f 100644 --- a/action.yml +++ b/action.yml @@ -132,12 +132,27 @@ runs: du -shc ${{ steps.paths.outputs.depot }}/* || true shell: bash - # github and actions/cache doesn't provide a way to update a cache at a given key, so we delete any - # that match the restore key just before saving the new cache + # GitHub actions cache entries are immutable and cannot be updated. In order to have both the Julia + # depot cache be up-to-date and avoid storing redundant cache entries we'll manually cleanup old + # cache entries before the new cache is saved. However, we need to be careful with our manual + # cleanup as otherwise we can cause cache misses for jobs which would have normally had a cache hit. + # Some scenarios to keep in mind include: + # + # - Job failures result in the post-action for `actions/cache` being skipped. If we delete all cache + # entries for the branch we may have no cache entry available for the next run. + # - We should avoid deleting old cache entries for the default branch since these entries serve as + # the fallback if no earlier cache entry exists on a branch. We can rely on GitHub's default cache + # eviction policy here which will remove the oldest cache entry first. + # + # References: + # - https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache + # - https://docs.github.com/en/actions/using-workflows/caching-dependencies-to-speed-up-workflows#usage-limits-and-eviction-policy # Not windows - uses: pyTooling/Actions/with-post-step@e9d0dc3dba9fda45f195946858708f60c0240caf # v1.0.5 - if: ${{ inputs.delete-old-caches != 'false' && runner.OS != 'Windows' }} + if: ${{ inputs.delete-old-caches != 'false' && + github.ref != format('refs/heads/{0}', github.event.repository.default_branch) && + runner.OS != 'Windows' }} with: # seems like there has to be a `main` step in this action. Could list caches for info if we wanted # main: julia ${{ github.action_path }}/handle_caches.jl "${{ github.repository }}" "list" @@ -148,7 +163,9 @@ runs: # Windows (because this action uses command prompt on windows) - uses: pyTooling/Actions/with-post-step@e9d0dc3dba9fda45f195946858708f60c0240caf # v1.0.5 - if: ${{ inputs.delete-old-caches != 'false' && runner.OS == 'Windows' }} + if: ${{ inputs.delete-old-caches != 'false' && + github.ref != format('refs/heads/{0}', github.event.repository.default_branch) && + runner.OS == 'Windows' }} with: main: echo "" post: cd %GITHUB_ACTION_PATH% && julia handle_caches.jl rm "${{ github.repository }}" "${{ steps.keys.outputs.restore-key }}" "${{ github.ref }}" "${{ inputs.delete-old-caches != 'required' }}" diff --git a/handle_caches.jl b/handle_caches.jl index 39e3ff2..5666299 100644 --- a/handle_caches.jl +++ b/handle_caches.jl @@ -10,37 +10,48 @@ function handle_caches() repo, restore_key, ref = ARGS[2:4] allow_failure = ARGS[5] == "true" - endpoint = "/repos/$repo/actions/caches" page = 1 per_page = 100 - escaped_restore_key = replace(restore_key, "\"" => "\\\"") - query = ".actions_caches[] | select(.key | startswith(\"$escaped_restore_key\")) | .id" - - deletions = String[] - failures = String[] + skipped = String[] + deleted = String[] + failed = String[] while 1 <= page <= 5 # limit to avoid accidental rate limiting - cmd = `gh api -X GET $endpoint -F ref=$ref -F per_page=$per_page -F page=$page --jq $query` + # https://docs.github.com/en/rest/actions/cache?apiVersion=2022-11-28#list-github-actions-caches-for-a-repository + # Note: The `key` field matches on the full key or a prefix. + cmd = ``` + gh api -X GET /repos/$repo/actions/caches + --field per_page=$per_page + --field page=$page + --field ref=$ref + --field key=$restore_key + --field sort=last_accessed_at + --field direction=desc + --jq '.actions_caches[].id' + ``` ids = split(read(cmd, String); keepempty=false) - page = length(ids) == per_page ? page + 1 : -1 - # We can delete all cache entries on this branch that matches the restore key - # because the new cache is saved later. + # Avoid deleting the latest used cache entry. This is particularly important for + # job failures where a new cache entry will not be saved after this. + page == 1 && !isempty(ids) && push!(skipped, popfirst!(ids)) + for id in ids try run(`gh cache delete $id --repo $repo`) - push!(deletions, id) + push!(deleted, id) catch e @error e - push!(failures, id) + push!(failed, id) end end + + page = length(ids) == per_page ? page + 1 : -1 end - if isempty(failures) && isempty(deletions) + if isempty(skipped) && isempty(deleted) && isempty(failed) println("No existing caches found on ref `$ref` matching restore key `$restore_key`") else - if !isempty(failures) - println("Failed to delete $(length(failures)) existing caches on ref `$ref` matching restore key `$restore_key`") - println.(failures) + if !isempty(failed) + println("Failed to delete $(length(failed)) existing caches on ref `$ref` matching restore key `$restore_key`") + println.(failed) @info """ To delete caches you need to grant the following to the default `GITHUB_TOKEN` by adding this to your workflow: @@ -55,9 +66,9 @@ function handle_caches() """ allow_failure || exit(1) end - if !isempty(deletions) - println("Deleted $(length(deletions)) caches on ref `$ref` matching restore key `$restore_key`") - println.(deletions) + if !isempty(deleted) + println("Deleted $(length(deleted)) caches on ref `$ref` matching restore key `$restore_key`") + println.(deleted) end end else