From b8f3ea56dacc0bd762cf9c6645334d5e0914ffb9 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 19 Aug 2024 13:56:50 +0100 Subject: [PATCH 1/3] rust: Avoid `rename-output.sh` output if there is no output This avoids producing: ``` +lint | no files found within ./target matching the provided output regexp +lint | find: '/tmp/earthly/lib/rust': No such file or directory ``` when there is no output, because `copy-output.sh` only creates that path `if [ -n \"\$1\" ]`. Apply the same condition to the `rename-output.sh` end of things. --- rust/Earthfile | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/rust/Earthfile b/rust/Earthfile index 6085119..c55665b 100644 --- a/rust/Earthfile +++ b/rust/Earthfile @@ -65,7 +65,7 @@ CARGO: cargo sweep -r -t $EARTHLY_SWEEP_DAYS; \ cargo sweep -r -i; \ $EARTHLY_FUNCTIONS_HOME/copy-output.sh "$output"; - RUN $EARTHLY_FUNCTIONS_HOME/rename-output.sh + RUN $EARTHLY_FUNCTIONS_HOME/rename-output.sh "$output" # SET_CACHE_MOUNTS_ENV sets the following entries in the environment, to be used to mount the cargo caches. # - EARTHLY_RUST_CARGO_HOME_CACHE: Code of the mount cache for the cargo home. @@ -177,13 +177,15 @@ INSTALL_EARTHLY_FUNCTIONS: chmod +x $EARTHLY_FUNCTIONS_HOME/copy-output.sh; \ # rename-output.sh moves files back from $OUTPUT_TMP_FOLDER to ./target # this function is expected to be called from a build context with ./target not belonging to a shared cache - echo "mkdir -p target; - if [ \"\$(find \"$OUTPUT_TMP_FOLDER\" -type f -printf . | wc -c)\" -eq 0 ]; then - echo \"no files found within ./target matching the provided output regexp\"; - else - cp -ruT \"$OUTPUT_TMP_FOLDER\" target; - rm -rf \"$OUTPUT_TMP_FOLDER\"; - fi;" > $EARTHLY_FUNCTIONS_HOME/rename-output.sh; \ + echo "if [ -n \"\$1\" ]; then + mkdir -p target; + if [ \"\$(find \"$OUTPUT_TMP_FOLDER\" -type f -printf . | wc -c)\" -eq 0 ]; then + echo \"no files found within ./target matching the provided output regexp\"; + else + cp -ruT \"$OUTPUT_TMP_FOLDER\" target; + rm -rf \"$OUTPUT_TMP_FOLDER\"; + fi; + fi;" > $EARTHLY_FUNCTIONS_HOME/rename-output.sh; \ chmod +x $EARTHLY_FUNCTIONS_HOME/rename-output.sh; \ fi; From a9b9b6ed64c29ae9ece9de99ece3741f2cdfde92 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 19 Aug 2024 13:56:52 +0100 Subject: [PATCH 2/3] rust: Echo `cargo $args` into the logs The logs otherwise just contain: ``` --> RUN set -e; cargo $args; cargo sweep -r -t $EARTHLY_SWEEP_DAYS; cargo sweep -r -i; $EARTHLY_FUNCTIONS_HOME/copy-output.sh "$output"; ``` which doesn't show what is actually being run. --- rust/Earthfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rust/Earthfile b/rust/Earthfile index c55665b..2766c3b 100644 --- a/rust/Earthfile +++ b/rust/Earthfile @@ -61,9 +61,12 @@ CARGO: END RUN --mount=$EARTHLY_RUST_CARGO_HOME_CACHE --mount=$EARTHLY_RUST_TARGET_CACHE \ set -e; \ + echo "+CARGO: cargo $args"; \ cargo $args; \ + echo "+CARGO: sweeping target cache"; \ cargo sweep -r -t $EARTHLY_SWEEP_DAYS; \ cargo sweep -r -i; \ + echo "+CARGO: copying output"; \ $EARTHLY_FUNCTIONS_HOME/copy-output.sh "$output"; RUN $EARTHLY_FUNCTIONS_HOME/rename-output.sh "$output" From d24d25c5a9721907ecdfb23d6f3d19a4e49a8c35 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 19 Aug 2024 13:56:52 +0100 Subject: [PATCH 3/3] rust: Replace `keep_fingerprints` logic with unconditional `cargo clean` Due to Earthly's layer cache code added with `COPY` (even with `--keep-ts`) can end up with timestamps (`mtime`) corresponding to the point of creation of the cache entry, not the current time. However on a following build the `target` mount cache may contain builds from other branches, with different code for those dependencies, which have a newer `mtime`. In this case `cargo` will think it can use the cached dependency instead of rebuilding (because the code appears older than the cached entry under `target`). Avoid this by using `cargo clean` to remove the build artifacts for any local crate. This should become unnecessary with https://github.com/rust-lang/cargo/issues/14136 This replaces the old behaviour of removing the fingerprints directory. Using `cargo clean` uses a proper cargo API rather than relying on implementation details like where the fingerprints live and what the consequence removing them is. It may also keep the cached data smaller since it removes the build artifacts which will likely never be reused due to the lack of fingerprint. Note that the previous fingerprint cleaning was subject to a race where a different parallel build could reintroduce some fingerprints between `DO +REMOVE_SOURCE_FINGERPRINTS` and the `RUN ... cargo $args`. For that reason the calls to `cargo clean` here are made within the same `RUN` command so that the target cache remains locked. By switching to `cargo metadata` the requirement for `tomljson` is removed. --- rust/Earthfile | 42 ++++-------------------------------------- rust/README.md | 6 ------ 2 files changed, 4 insertions(+), 44 deletions(-) diff --git a/rust/Earthfile b/rust/Earthfile index 2766c3b..71f0db6 100644 --- a/rust/Earthfile +++ b/rust/Earthfile @@ -2,7 +2,6 @@ VERSION 0.8 # INIT sets some configuration in the environment (used by following functions), and installs required dependencies. # Arguments: # - cache_prefix: Overrides cache prefix for cache IDS. Its value is exported to the build environment under the entry: $EARTHLY_CACHE_PREFIX. By default ${EARTHLY_TARGET_PROJECT_NO_TAG}#${OS_RELEASE}#earthly-cargo-cache -# - keep_fingerprints (false): Instructs the following +CARGO calls to not remove the Cargo fingerprints of the source packages. Use only when source packages have been COPYed with --keep-ts option. # - sweep_days (4): +CARGO uses cargo-sweep to clean build artifacts that haven't been accessed for this number of days. INIT: FUNCTION @@ -17,6 +16,7 @@ INIT: ENV PATH="$PATH:$CARGO_HOME/bin" END DO +INSTALL_CARGO_SWEEP + COPY +get-jq/jq /tmp/jq # $EARTHLY_CACHE_PREFIX ARG EARTHLY_TARGET_PROJECT_NO_TAG #https://docs.earthly.dev/docs/earthfile/builtin-args @@ -24,10 +24,6 @@ INIT: ARG cache_prefix="${EARTHLY_TARGET_PROJECT_NO_TAG}#${OS_RELEASE}#earthly-cargo-cache" ENV EARTHLY_CACHE_PREFIX=$cache_prefix - # $EARTHLY_KEEP_FINGERPRINTS - ARG keep_fingerprints=false - ENV EARTHLY_KEEP_FINGERPRINTS=$keep_fingerprints - # $EARTHLY_SWEEP_DAYS ARG sweep_days=4 ENV EARTHLY_SWEEP_DAYS=$sweep_days @@ -56,9 +52,6 @@ CARGO: ARG --required args ARG output DO +SET_CACHE_MOUNTS_ENV - IF [ "$EARTHLY_KEEP_FINGERPRINTS" = "false" ] - DO +REMOVE_SOURCE_FINGERPRINTS - END RUN --mount=$EARTHLY_RUST_CARGO_HOME_CACHE --mount=$EARTHLY_RUST_TARGET_CACHE \ set -e; \ echo "+CARGO: cargo $args"; \ @@ -67,7 +60,9 @@ CARGO: cargo sweep -r -t $EARTHLY_SWEEP_DAYS; \ cargo sweep -r -i; \ echo "+CARGO: copying output"; \ - $EARTHLY_FUNCTIONS_HOME/copy-output.sh "$output"; + $EARTHLY_FUNCTIONS_HOME/copy-output.sh "$output"; \ + echo "+CARGO: removing local crates from target cache"; \ + cargo metadata --format-version=1 --no-deps | /tmp/jq -r '.packages[].name' | xargs -I{} cargo clean -p {}; RUN $EARTHLY_FUNCTIONS_HOME/rename-output.sh "$output" # SET_CACHE_MOUNTS_ENV sets the following entries in the environment, to be used to mount the cargo caches. @@ -138,15 +133,6 @@ get-cross: RUN wget -nv -O- "https://github.com/cross-rs/cross/releases/download/v${version}/cross-x86_64-unknown-linux-musl.tar.gz" | tar -xzf - -C . SAVE ARTIFACT cross -get-tomljson: - FROM alpine:3.18.3 - ARG USERARCH - ARG version=2.1.0 - RUN wget -O tomljson.tar.xz https://github.com/pelletier/go-toml/releases/download/v${version}/tomljson_${version}_linux_${USERARCH}.tar.xz && \ - tar -xf tomljson.tar.xz; \ - chmod +x tomljson - SAVE ARTIFACT tomljson - get-jq: FROM alpine:3.18.3 ARG USERARCH @@ -192,26 +178,6 @@ INSTALL_EARTHLY_FUNCTIONS: chmod +x $EARTHLY_FUNCTIONS_HOME/rename-output.sh; \ fi; -REMOVE_SOURCE_FINGERPRINTS: - FUNCTION - DO +CHECK_INITED - COPY +get-tomljson/tomljson /tmp/tomljson - COPY +get-jq/jq /tmp/jq - RUN if [ ! -n "$EARTHLY_RUST_TARGET_CACHE" ]; then \ - echo "+SET_CACHE_MOUNTS_ENV has not been called yet in this build environment" ; \ - exit 1; \ - fi; - RUN --mount=$EARTHLY_RUST_TARGET_CACHE \ - set -e;\ - source_libs=$(find . -name Cargo.toml -exec bash -c '/tmp/tomljson {} | /tmp/jq -r .package.name; printf "\n"' \;) ; \ - fingerprint_folders=$(find target -name .fingerprint) ; \ - for fingerprint_folder in $fingerprint_folders; do \ - cd $fingerprint_folder; \ - for source_lib in $source_libs; do \ - find . -maxdepth 1 -regex "\./$source_lib-[^-]+" -exec bash -c 'echo "deleting $(readlink -f {})"; rm -rf {}' \; ; \ - done \ - done; - CHECK_INITED: FUNCTION RUN if [ ! -n "$EARTHLY_CACHE_PREFIX" ]; then \ diff --git a/rust/README.md b/rust/README.md index 689a3b4..47f20a9 100644 --- a/rust/README.md +++ b/rust/README.md @@ -29,12 +29,6 @@ DO rust+INIT ... Overrides cache prefix for cache IDS. Its value is exported to the build environment under the entry: `$EARTHLY_CACHE_PREFIX`. By default `${EARTHLY_TARGET_PROJECT_NO_TAG}#${OS_RELEASE}#earthly-cargo-cache` -#### `keep_fingerprints (false)` - -By default `+CARGO` removes the [compiler fingerprints](https://doc.rust-lang.org/nightly/nightly-rustc/cargo/core/compiler/fingerprint/struct.Fingerprint.html) of those packages found in your source code (not their dependencies), to force their recompilation and work even when the Earthly `COPY` commands overwrote file mtimes (by default). - -Set `keep_fingerprints=true` to keep the source packages fingerprints and avoid their recompilation, when source packages have been copied with `--keep-ts `option. - #### `sweep_days (4)` `+CARGO` calls use cargo-sweep to clean build artifacts that haven't been accessed for this number of days.