Skip to content

Commit 00de2c0

Browse files
committed
fix: condaenv package dependency
1 parent 23a9d7b commit 00de2c0

File tree

2 files changed

+22
-6
lines changed

2 files changed

+22
-6
lines changed

rdagent/scenarios/finetune/conda/llm_finetune_requirements.txt

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
# Equivalent to: rdagent/scenarios/finetune/docker/llm_finetune_docker/Dockerfile
33
# Docker base: hiyouga/llamafactory:0.9.4 uses PyTorch 2.6.0 + CUDA 12.4 + flash-attn 2.7.4
44

5-
# PyTorch 2.6.0 with CUDA 12.4 (matches Docker base image)
6-
--index-url https://download.pytorch.org/whl/cu124
5+
# PyTorch 2.6.0 with CUDA 12.8 (for B200 GPUs)
6+
# Note: Change to cu124 for CUDA 12.4 machines
7+
--index-url https://download.pytorch.org/whl/cu128
78
torch==2.6.0
89
torchvision==0.21.0
910

@@ -13,8 +14,8 @@ torchvision==0.21.0
1314
# Core LlamaFactory package (PyPI latest is 0.9.3, Docker uses 0.9.4 from GitHub)
1415
llamafactory==0.9.3
1516

16-
# FlashAttention-2 (matches Docker base image version)
17-
flash-attn==2.7.4
17+
# FlashAttention-2: installed separately via llm_finetune_flash_attn.txt
18+
# (requires torch installed first, and --no-build-isolation flag)
1819

1920
# Additional dependencies (matches Dockerfile line 17)
2021
bitsandbytes>=0.39.0

rdagent/utils/env.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -835,10 +835,25 @@ class FTCondaEnv(LocalEnv[FTCondaConf]):
835835

836836
def prepare(self) -> None:
837837
try:
838+
env_name = self.conf.conda_env_name
839+
840+
# Skip if already prepared
841+
if env_name in _CONDA_ENV_PREPARED:
842+
return
843+
844+
# Step 1: Install base dependencies (torch, llamafactory, etc.)
838845
req_file = FT_CONDA_CONFIG_DIR / "llm_finetune_requirements.txt"
839-
_prepare_conda_env(self.conf.conda_env_name, req_file)
846+
_prepare_conda_env(env_name, req_file)
847+
848+
# Step 2: Install flash-attn (requires torch first, uses --no-build-isolation)
849+
# --no-cache-dir: avoid cross-filesystem hardlink error when /tmp and ~/.cache/pip are on different mounts
850+
print("[yellow]Installing flash-attn (compiling, may take a few minutes)...[/yellow]")
851+
subprocess.check_call(
852+
f"conda run -n {env_name} pip install 'flash-attn>=2.5.6,<=2.7.4' --no-build-isolation --no-cache-dir",
853+
shell=True,
854+
)
855+
840856
# Re-update bin_path after prepare() in case the conda env was just created
841-
# This fixes the issue where bin_path is empty if queried before env exists
842857
if not self.conf.bin_path:
843858
self.conf._update_bin_path()
844859
except Exception as e:

0 commit comments

Comments
 (0)