From ff4edd917f0660ad53daf791a3d4538fbd968784 Mon Sep 17 00:00:00 2001 From: hdnh2006 Date: Mon, 26 May 2025 18:25:40 +0200 Subject: [PATCH 1/6] Updated README for blackwell gpus and ssd + nvme config --- README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fcb03b5..0993e55 100644 --- a/README.md +++ b/README.md @@ -41,19 +41,17 @@ Paypal PayPal.Me/cryptolabsZA ## Host install guide for vast.ai ``` -#Start with a clean install of ubuntu 22.04.x HWE Kernal server. Just add openssh. sudo apt update && sudo apt upgrade -y && sudo apt dist-upgrade -y && sudo apt install update-manager-core -y #if you did not install HWE kernal do the following sudo apt install --install-recommends linux-generic-hwe-22.04 -y sudo reboot - #install the drivers. sudo apt install build-essential -y sudo add-apt-repository ppa:graphics-drivers/ppa -y sudo apt update # to search for available NVIDIA drivers: use this command sudo apt search nvidia-driver | grep nvidia-driver | sort -r -sudo apt install nvidia-driver-560 -y # assuming the latest is 560 +sudo apt install nvidia-driver-570 -y # assuming the latest is 570. Use nvidia-driver-570-open for blackwell architecture gpus #Remove unattended-upgrades Package so that the dirver don't upgrade when you have clients sudo apt purge --auto-remove unattended-upgrades -y @@ -73,7 +71,8 @@ bash -c 'sudo apt-get update; sudo apt-get -y upgrade; sudo apt-get install -y l echo -e "n\n\n\n\n\n\nw\n" | sudo cfdisk /dev/nvme0n1 && sudo mkfs.xfs /dev/nvme0n1p1 sudo mkdir /var/lib/docker -#I added discard so that the ssd is trimeds by ubunut and nofail if there is some problem with the drive the system will still boot. +# I added discard so that the ssd is trimeds by ubuntu and nofail if there is some problem with the drive the system will still boot. +# You must ensure your nvme disk is properly partitioned and formatted with the XFS file system. sudo bash -c 'uuid=$(sudo xfs_admin -lu /dev/nvme0n1p1 | sed -n "2p" | awk "{print \$NF}"); echo "UUID=$uuid /var/lib/docker/ xfs rw,auto,pquota,discard,nofail 0 0" >> /etc/fstab' sudo mount -a @@ -88,6 +87,9 @@ sudo bash -c '(crontab -l; echo "@reboot nvidia-smi -pm 1" ) | crontab -' sudo apt install python3 -y sudo wget https://console.vast.ai/install -O install; sudo python3 install YourKey; history -d $((HISTCMD-1)); +# Add the flag --no-partitioning if decided to boot from a ssd +sudo wget https://console.vast.ai/install -O install; sudo python3 install --no-partitioning; history -d $((HISTCMD-1)); + nano /etc/default/grub # find the GRUB_CMDLINE_LINUX="" and ensure it looks like this. GRUB_CMDLINE_LINUX="amd_iommu=on nvidia_drm.modeset=0 systemd.unified_cgroup_hierarchy=false" From 559cfc5b3f184ea7ef9408516e1ad94da7d70337 Mon Sep 17 00:00:00 2001 From: hdnh2006 Date: Mon, 26 May 2025 18:30:03 +0200 Subject: [PATCH 2/6] deleted line recovered --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 0993e55..4f2e6c8 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ Paypal PayPal.Me/cryptolabsZA ## Host install guide for vast.ai ``` +#Start with a clean install of ubuntu 22.04.x HWE Kernal server. Just add openssh. sudo apt update && sudo apt upgrade -y && sudo apt dist-upgrade -y && sudo apt install update-manager-core -y #if you did not install HWE kernal do the following sudo apt install --install-recommends linux-generic-hwe-22.04 -y From ea7f963c5c44239ec6a418e9dd15aa2e6b695a21 Mon Sep 17 00:00:00 2001 From: Henry Date: Mon, 29 Sep 2025 16:25:03 +0200 Subject: [PATCH 3/6] Update README.md --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 4f2e6c8..6e4aa78 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,11 @@ sudo apt update && sudo apt upgrade -y && sudo apt dist-upgrade -y && sudo apt i #if you did not install HWE kernal do the following sudo apt install --install-recommends linux-generic-hwe-22.04 -y sudo reboot + +# Expand disk if just 100GB are used +sudo lvextend -l +100%FREE /dev/ubuntu-vg/ubuntu-lv +sudo resize2fs /dev/ubuntu-vg/ubuntu-lv + #install the drivers. sudo apt install build-essential -y sudo add-apt-repository ppa:graphics-drivers/ppa -y From f151ec9daf08acb904ff5668308f7624df9394a7 Mon Sep 17 00:00:00 2001 From: Henry Date: Mon, 29 Sep 2025 16:45:10 +0200 Subject: [PATCH 4/6] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6e4aa78..d2387b9 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ sudo mount -a # check that /dev/nvme0n1p1 is mounted to /var/lib/docker/ df -h -#this will enable Persistence mode on reboot so that the gpus can go to idle power when not used +#this will enable Persistence mode on reboot so that the gpus can go to idle power when not used. Normal: no crontab for root message is gotten sudo bash -c '(crontab -l; echo "@reboot nvidia-smi -pm 1" ) | crontab -' #run the install command for vast From 928b21204f852436432b40bf9863985934d934d5 Mon Sep 17 00:00:00 2001 From: Henry Date: Mon, 29 Sep 2025 16:49:22 +0200 Subject: [PATCH 5/6] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d2387b9..bb68b54 100644 --- a/README.md +++ b/README.md @@ -89,8 +89,8 @@ df -h #this will enable Persistence mode on reboot so that the gpus can go to idle power when not used. Normal: no crontab for root message is gotten sudo bash -c '(crontab -l; echo "@reboot nvidia-smi -pm 1" ) | crontab -' -#run the install command for vast -sudo apt install python3 -y +#run the install command for vast. It will install docker as well +sudo apt install python3 -y # No need for modern Ubuntu distributions sudo wget https://console.vast.ai/install -O install; sudo python3 install YourKey; history -d $((HISTCMD-1)); # Add the flag --no-partitioning if decided to boot from a ssd From 5e13afb00d5a65a572e029b839628ad00571d2e2 Mon Sep 17 00:00:00 2001 From: Henry Date: Mon, 29 Sep 2025 17:06:04 +0200 Subject: [PATCH 6/6] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bb68b54..ee8d678 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ df -h #this will enable Persistence mode on reboot so that the gpus can go to idle power when not used. Normal: no crontab for root message is gotten sudo bash -c '(crontab -l; echo "@reboot nvidia-smi -pm 1" ) | crontab -' -#run the install command for vast. It will install docker as well +#run the install command for vast. It will install docker as well. If nvml error is gotten, reboot or check the solutions in this README. Run several times if some error is gotten. sudo apt install python3 -y # No need for modern Ubuntu distributions sudo wget https://console.vast.ai/install -O install; sudo python3 install YourKey; history -d $((HISTCMD-1));