File tree Expand file tree Collapse file tree 2 files changed +22
-6
lines changed
Expand file tree Collapse file tree 2 files changed +22
-6
lines changed Original file line number Diff line number Diff line change 22# Equivalent to: rdagent/scenarios/finetune/docker/llm_finetune_docker/Dockerfile
33# Docker base: hiyouga/llamafactory:0.9.4 uses PyTorch 2.6.0 + CUDA 12.4 + flash-attn 2.7.4
44
5- # PyTorch 2.6.0 with CUDA 12.4 (matches Docker base image)
6- --index-url https://download.pytorch.org/whl/cu124
5+ # PyTorch 2.6.0 with CUDA 12.8 (for B200 GPUs)
6+ # Note: Change to cu124 for CUDA 12.4 machines
7+ --index-url https://download.pytorch.org/whl/cu128
78torch==2.6.0
89torchvision==0.21.0
910
@@ -13,8 +14,8 @@ torchvision==0.21.0
1314# Core LlamaFactory package (PyPI latest is 0.9.3, Docker uses 0.9.4 from GitHub)
1415llamafactory==0.9.3
1516
16- # FlashAttention-2 (matches Docker base image version)
17- flash-attn==2.7.4
17+ # FlashAttention-2: installed separately via llm_finetune_flash_attn.txt
18+ # (requires torch installed first, and --no-build-isolation flag)
1819
1920# Additional dependencies (matches Dockerfile line 17)
2021bitsandbytes>=0.39.0
Original file line number Diff line number Diff line change @@ -835,10 +835,25 @@ class FTCondaEnv(LocalEnv[FTCondaConf]):
835835
836836 def prepare (self ) -> None :
837837 try :
838+ env_name = self .conf .conda_env_name
839+
840+ # Skip if already prepared
841+ if env_name in _CONDA_ENV_PREPARED :
842+ return
843+
844+ # Step 1: Install base dependencies (torch, llamafactory, etc.)
838845 req_file = FT_CONDA_CONFIG_DIR / "llm_finetune_requirements.txt"
839- _prepare_conda_env (self .conf .conda_env_name , req_file )
846+ _prepare_conda_env (env_name , req_file )
847+
848+ # Step 2: Install flash-attn (requires torch first, uses --no-build-isolation)
849+ # --no-cache-dir: avoid cross-filesystem hardlink error when /tmp and ~/.cache/pip are on different mounts
850+ print ("[yellow]Installing flash-attn (compiling, may take a few minutes)...[/yellow]" )
851+ subprocess .check_call (
852+ f"conda run -n { env_name } pip install 'flash-attn>=2.5.6,<=2.7.4' --no-build-isolation --no-cache-dir" ,
853+ shell = True ,
854+ )
855+
840856 # Re-update bin_path after prepare() in case the conda env was just created
841- # This fixes the issue where bin_path is empty if queried before env exists
842857 if not self .conf .bin_path :
843858 self .conf ._update_bin_path ()
844859 except Exception as e :
You can’t perform that action at this time.
0 commit comments