diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index ecd8fa7..26722ce 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -6,10 +6,12 @@ on: - main paths: - 'action.yml' + - 'handle_caches.jl' - '.github/**' pull_request: paths: - 'action.yml' + - 'handle_caches.jl' - '.github/**' jobs: @@ -48,6 +50,26 @@ jobs: shell: 'julia --color=yes {0}' run: 'using Pkg; Pkg.add("pandoc_jll")' + # Do tests with no matrix also given the matrix is auto-included in cache key + test-save-nomatrix: + needs: generate-key + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 + - name: Save cache + id: cache + uses: ./ + with: + cache-name: ${{ needs.generate-key.outputs.cache-name }} + - name: Check no artifacts dir + shell: 'julia --color=yes {0}' + run: | + dir = joinpath(first(DEPOT_PATH), "artifacts") + @assert !isdir(dir) + - name: Install a small binary + shell: 'julia --color=yes {0}' + run: 'using Pkg; Pkg.add("pandoc_jll")' + test-restore: needs: [generate-key, test-save] strategy: @@ -72,13 +94,47 @@ jobs: - name: Check existance or emptiness of affected dirs shell: 'julia --color=yes {0}' run: | - # Artifacts and Packages should exist as they've been cached + # These dirs should exist as they've been cached artifacts_dir = joinpath(first(DEPOT_PATH), "artifacts") @assert !isempty(readdir(artifacts_dir)) packages_dir = joinpath(first(DEPOT_PATH), "packages") @assert !isempty(readdir(packages_dir)) - - # Caching the compiled dir is disabled by default and should not exist after restoring a cache compiled_dir = joinpath(first(DEPOT_PATH), "compiled") - @assert !isdir(compiled_dir) || isempty(readdir(compiled_dir)) + @assert !isempty(readdir(compiled_dir)) + scratchspaces_dir = joinpath(first(DEPOT_PATH), "scratchspaces") + @assert !isempty(readdir(scratchspaces_dir)) + logs_dir = joinpath(first(DEPOT_PATH), "logs") + @assert !isempty(readdir(logs_dir)) + + test-restore-nomatrix: + needs: [generate-key, test-save-nomatrix] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 + - name: Restore cache + id: cache + uses: ./ + with: + cache-name: ${{ needs.generate-key.outputs.cache-name }} + - name: Test cache-hit output + shell: 'julia --color=yes {0}' + run: | + @show ENV["cache-hit"] + @assert ENV["cache-hit"] == "true" + env: + cache-hit: ${{ steps.cache.outputs.cache-hit }} + - name: Check existance or emptiness of affected dirs + shell: 'julia --color=yes {0}' + run: | + # These dirs should exist as they've been cached + artifacts_dir = joinpath(first(DEPOT_PATH), "artifacts") + @assert !isempty(readdir(artifacts_dir)) + packages_dir = joinpath(first(DEPOT_PATH), "packages") + @assert !isempty(readdir(packages_dir)) + compiled_dir = joinpath(first(DEPOT_PATH), "compiled") + @assert !isempty(readdir(compiled_dir)) + scratchspaces_dir = joinpath(first(DEPOT_PATH), "scratchspaces") + @assert !isempty(readdir(scratchspaces_dir)) + logs_dir = joinpath(first(DEPOT_PATH), "logs") + @assert !isempty(readdir(logs_dir)) diff --git a/README.md b/README.md index ff0778d..db35f01 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # julia-actions/cache Action -A shortcut action to cache Julia artifacts, packages and (optionally) registries to reduce GitHub Actions running time. +A shortcut action to cache Julia depot contents to reduce GitHub Actions running time. ## Usage @@ -22,8 +22,7 @@ jobs: - uses: julia-actions/julia-runtest@v1 ``` -By default, this caches the files in `~/.julia/artifacts/` and `~/.julia/packages/`. -To also cache `~/.julia/registries/`, use +By default the majority of the depot is cached. To also cache `~/.julia/registries/`, use ```yaml - uses: julia-actions/cache@v1 @@ -31,28 +30,62 @@ To also cache `~/.julia/registries/`, use cache-registries: "true" ``` -Note that caching the registries may actually slow down the workflow running time on Windows runners. -That is why caching the registries is disabled by default. +However note that caching the registries may mean that the registry will not be updated each run. -### Inputs +### Optional Inputs -- `cache-name` - Name used as part of the cache keys -- `cache-artifacts` - Whether to cache `~/.julia/artifacts/`. Enabled by default. -- `cache-packages` - Whether to cache `~/.julia/packages/`. Enabled by default. -- `cache-registries` - Whether to cache `~/.julia/registries/`. Disabled by default. -- `cache-compiled` - Whether to cache `~/.julia/compiled/`. Disabled by default. **USE ONLY IF YOU KNOW WHAT YOU'RE DOING!** See [#11](https://github.com/julia-actions/cache/issues/11). -- `cache-scratchspaces` - Whether to cache `~/.julia/scratchspaces/`. Enabled by default. +- `cache-name` - The cache key prefix. Defaults to `julia-cache`. The key body automatically includes matrix vars and the OS. Include any other parameters/details in this prefix to ensure one unique cache key per concurrent job type. +- `include-matrix` - Whether to include the matrix values when constructing the cache key. Defaults to `true`. +- `cache-artifacts` - Whether to cache `~/.julia/artifacts/`. Defaults to `true`. +- `cache-packages` - Whether to cache `~/.julia/packages/`. Defaults to `true`. +- `cache-registries` - Whether to cache `~/.julia/registries/`. Defaults to `false`. Disabled to ensure CI gets latest versions. +- `cache-compiled` - Whether to cache `~/.julia/compiled/`. Defaults to `true`. +- `cache-scratchspaces` - Whether to cache `~/.julia/scratchspaces/`. Defaults to `true`. +- `cache-log` - Whether to cache `~/.julia/logs/`. Defaults to `true`. Helps auto-`Pkg.gc()` keep the cache small. +- `delete-old-caches` - Whether to delete old caches for the given key. Defaults to `true` ### Outputs - `cache-hit` - A boolean value to indicate an exact match was found for the primary key. Returns \"\" when the key is new. Forwarded from actions/cache. -## How it works +## How It Works This action is a wrapper around . In summary, this action stores the files in the aforementioned paths in one compressed file when running for the first time. -This cached file is then restored upon the second run. -The benefit of this is that downloading one big file is quicker than downloading many different files from many different locations. +This cached file is then restored upon the second run, and afterwards resaved under a new key, and the previous cache deleted. +The benefit of caching is that downloading one big file is quicker than downloading many different files from many different locations +and precompiling them. + +### Cache keys + +The cache key that the cache will be saved as is based on: +- The `cache-name` input +- All variables in the `matrix` (unless disabled via `include-matrix: 'false'`) +- The `runner.os` (may be in the matrix too, but included for safety) +- The run id +- The run attempt number + +> [!NOTE] +> If in your workflow if you do not use a matrix for concurrency you should make `cache-name` such that it is unique for +> concurrent jobs, otherwise caching may not be effective. + +### Cache Retention + +This action automatically deletes old caches that match the first 4 fields of the above key: +- The `cache-name` input +- All variables in the `matrix` (unless disabled via `include-matrix: 'false'`) +- The `runner.os` (may be in the matrix too, but included for safety) + +Which means your caches files will not grow needlessly. Github also deletes cache files after +[90 days which can be increased in private repos to up to 400 days](https://docs.github.com/en/organizations/managing-organization-settings/configuring-the-retention-period-for-github-actions-artifacts-and-logs-in-your-organization) + +To disable deletion set input `delete-old-caches: 'false'`. + +### Cache Garbage Collection + +Caches are restored and re-saved after every run, retaining the state of the depot throughout runs. +Their size will be regulated like a local depot automatically by the automatic `Pkg.gc()` functionality that +clears out old content, which is made possible because the `/log` contents are cached. ## Third Party Notice diff --git a/action.yml b/action.yml index b966dad..42fbe03 100644 --- a/action.yml +++ b/action.yml @@ -8,8 +8,11 @@ branding: inputs: cache-name: - description: 'Name used as part of the cache keys' + description: 'The cache key prefix. Unless disabled the key body automatically includes matrix vars, and the OS. Include any other parameters/details in this prefix to ensure one unique cache key per concurrent job type.' default: 'julia-cache' + include-matrix: + description: 'Whether to include the matrix values when constructing the cache key' + default: 'true' cache-artifacts: description: 'Whether to cache ~/.julia/artifacts/' default: 'true' @@ -17,14 +20,20 @@ inputs: description: 'Whether to cache ~/.julia/packages/' default: 'true' cache-registries: - description: 'Whether to cache ~/.julia/registries/' + description: 'Whether to cache ~/.julia/registries/. This is off by default to ensure CI gets latest versions' default: 'false' cache-compiled: - description: 'Whether to cache ~/.julia/compiled. USE WITH CAUTION! See https://github.com/julia-actions/cache/issues/11 for caveats.' - default: 'false' + description: 'Whether to cache ~/.julia/compiled/' + default: 'true' cache-scratchspaces: description: 'Whether to cache ~/.julia/scratchspaces/' default: 'true' + cache-logs: + description: 'Whether to cache ~/.julia/logs/. This helps automatic Pkg.gc() keep the cache size down' + default: 'true' + delete-old-caches: + description: 'Whether to delete old caches for the given key' + default: 'true' outputs: cache-hit: @@ -43,18 +52,52 @@ runs: [ "${{ inputs.cache-registries }}" = "true" ] && R_PATH="~/.julia/registries" echo "registries-path=$R_PATH" >> $GITHUB_OUTPUT [ "${{ inputs.cache-compiled }}" = "true" ] && PCC_PATH="~/.julia/compiled" - echo "precompilation-cache-path=$PCC_PATH" >> $GITHUB_OUTPUT + echo "compiled-path=$PCC_PATH" >> $GITHUB_OUTPUT [ "${{ inputs.cache-scratchspaces }}" = "true" ] && S_PATH="~/.julia/scratchspaces" echo "scratchspaces-path=$S_PATH" >> $GITHUB_OUTPUT + [ "${{ inputs.cache-logs }}" = "true" ] && L_PATH="~/.julia/logs" + echo "logs-path=$L_PATH" >> $GITHUB_OUTPUT shell: bash - - uses: actions/cache@704facf57e6136b1bc63b828d79edcd491f0ee84 + # MATRIX_STRING is a join of all matrix variables that helps concurrent runs have a unique cache key. + # The underscore at the end of the restore key demarks the end of the restore section. Without this + # a runner without a matrix has a restore key that will cause impropper clearing of caches from those + # with a matrix. + - id: keys + run: | + MATRIX_STRING="${{ join(matrix.*, '-') }}" + [ -n "$MATRIX_STRING" ] && MATRIX_STRING="-${MATRIX_STRING}" + RESTORE_KEY="${{ inputs.cache-name }}-${{ runner.os }}${MATRIX_STRING}_" + echo "restore-key=${RESTORE_KEY}" >> $GITHUB_OUTPUT + echo "key=${RESTORE_KEY}${{ github.run_id }}-${{ github.run_attempt }}" >> $GITHUB_OUTPUT + shell: bash + + - uses: actions/cache@4d4ae6ae148a43d0fd1eda1800170683e9882738 id: cache with: - path: "${{ format('{0}\n{1}\n{2}\n{3}\n{4}', steps.paths.outputs.artifacts-path, steps.paths.outputs.packages-path, steps.paths.outputs.registries-path, steps.paths.outputs.precompilation-cache-path, steps.paths.outputs.scratchspaces-path) }}" - key: ${{ runner.os }}-${{ inputs.cache-name }}-${{ hashFiles('**/Project.toml') }} - restore-keys: | - ${{ runner.os }}-${{ inputs.cache-name }}- + path: | + ${{ steps.paths.outputs.artifacts-path }} + ${{ steps.paths.outputs.packages-path }} + ${{ steps.paths.outputs.registries-path }} + ${{ steps.paths.outputs.scratchspaces-path }} + ${{ steps.paths.outputs.compiled-path }} + ${{ steps.paths.outputs.logs-path }} + + key: ${{ steps.keys.outputs.key }} + restore-keys: ${{ steps.keys.outputs.restore-key }} + enableCrossOsArchive: false + + # github and actions/cache doesn't provide a way to update a cache at a given key, so we delete any + # that match the restore key just before saving the new cache + - uses: pyTooling/Actions/with-post-step@adef08d3bdef092282614f3b683897cefae82ee3 + if: ${{ inputs.delete-old-caches == 'true' }} + with: + # seems like there has to be a `main` step in this action. Could list caches for info if we wanted + # main: julia ${{ github.action_path }}/handle_caches.jl "${{ github.repository }}" "list" + main: du -shc ~/.julia/* || true + post: julia ${{ github.action_path }}/handle_caches.jl "${{ github.repository }}" "rm" "${{ steps.keys.outputs.restore-key }}" + env: + GH_TOKEN: ${{ github.token }} - id: hit run: echo "cache-hit=$CACHE_HIT" >> $GITHUB_OUTPUT diff --git a/handle_caches.jl b/handle_caches.jl new file mode 100644 index 0000000..9066712 --- /dev/null +++ b/handle_caches.jl @@ -0,0 +1,45 @@ +using Pkg, Dates +function handle_caches() + repo = ARGS[1] + func = ARGS[2] + restore_key = get(ARGS, 3, "") + + if func == "list" + println("Listing existing caches") + run(`gh cache list --limit 100 --repo $repo`) + elseif func == "rm" + caches = String[] + for _ in 1:5 # limit to avoid accidental rate limiting + hits = split(strip(read(`gh cache list --limit 100 --repo $repo`, String)), keepempty=false) + search_again = length(hits) == 100 + filter!(contains(restore_key), hits) + isempty(hits) && break + # We can delete everything that matches the restore key because the new cache is saved later. + for c in hits + try + run(`gh cache delete $(split(c)[1]) --repo $repo`) + catch e + @error e + end + end + append!(caches, hits) + search_again || break + end + if isempty(caches) + println("No existing caches found for restore key `$restore_key`") + else + println("$(length(caches)) existing caches deleted that match restore key `$restore_key`:") + println.(caches) + end + else + throw(ArgumentError("Unexpected second argument: $func")) + end +end + +try + # do a gc with the standard 7-day delay + Pkg.gc() + handle_caches() +catch e + @error "An error occurred while managing existing caches" e +end