diff --git a/build_tools/rocm/run_xla.sh b/build_tools/rocm/run_xla.sh index a58f07f04b9e5..89a469ef1c679 100755 --- a/build_tools/rocm/run_xla.sh +++ b/build_tools/rocm/run_xla.sh @@ -124,6 +124,18 @@ elif [[ $1 == "tsan" ]]; then shift fi +clean_up() { + # clean up nccl- files + rm -rf /dev/shm/nccl-* + + # clean up bazel disk_cache + bazel shutdown \ + --disk_cache=${BAZEL_DISK_CACHE_DIR} \ + --experimental_disk_cache_gc_max_size=100G +} + +trap clean_up EXIT + bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \ --config=rocm_ci \ --config=xla_sgpu \ @@ -147,8 +159,3 @@ bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \ --test_filter=-$(IFS=: ; echo "${EXCLUDED_TESTS[*]}") \ "${SANITIZER_ARGS[@]}" \ "$@" - -# clean up bazel disk_cache -bazel shutdown \ - --disk_cache=${BAZEL_DISK_CACHE_DIR} \ - --experimental_disk_cache_gc_max_size=${BAZEL_DISK_CACHE_SIZE} diff --git a/build_tools/rocm/run_xla_ci_build.sh b/build_tools/rocm/run_xla_ci_build.sh index 3c08930cb33c1..c07c14fa1fc06 100755 --- a/build_tools/rocm/run_xla_ci_build.sh +++ b/build_tools/rocm/run_xla_ci_build.sh @@ -36,6 +36,18 @@ for arg in "$@"; do fi done +clean_up() { + # clean up nccl- files + rm -rf /dev/shm/nccl-* + + # clean up bazel disk_cache + bazel shutdown \ + --disk_cache=${BAZEL_DISK_CACHE_DIR} \ + --experimental_disk_cache_gc_max_size=100G +} + +trap clean_up EXIT + bazel --bazelrc="$SCRIPT_DIR/rocm_xla.bazelrc" test \ --config=rocm_rbe \ --disk_cache=${BAZEL_DISK_CACHE_DIR} \ diff --git a/build_tools/rocm/run_xla_multi_gpu.sh b/build_tools/rocm/run_xla_multi_gpu.sh index 81a45711bbb3c..f5582a7612830 100755 --- a/build_tools/rocm/run_xla_multi_gpu.sh +++ b/build_tools/rocm/run_xla_multi_gpu.sh @@ -102,6 +102,18 @@ elif [[ $1 == "tsan" ]]; then shift fi +clean_up() { + # clean up nccl- files + rm -rf /dev/shm/nccl-* + + # clean up bazel disk_cache + bazel shutdown \ + --disk_cache=${BAZEL_DISK_CACHE_DIR} \ + --experimental_disk_cache_gc_max_size=100G +} + +trap clean_up EXIT + bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \ --config=rocm_ci \ --config=xla_mgpu \ @@ -124,8 +136,3 @@ bazel --bazelrc=build_tools/rocm/rocm_xla.bazelrc test \ "${SANITIZER_ARGS[@]}" \ "$@" \ --strategy=TestRunner=local # execute multigpu tests locally as there is no gpu exclusive protection on rbe - -# clean up bazel disk_cache -bazel shutdown \ - --disk_cache=${BAZEL_DISK_CACHE_DIR} \ - --experimental_disk_cache_gc_max_size=${BAZEL_DISK_CACHE_SIZE}