Skip to content
This repository was archived by the owner on Mar 2, 2022. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions launch_minishogi_clients.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Launching clients.

export me=`whoami`
export host="`squeue -u $me | grep -iv pd | grep -i pg | grep ser | sed 's/.*learnfair/learnfair/g' | sed 's/ //g'`"
echo "host=<${host}>"
for k in `seq 5`
do
sbatch --array=0-279%20 --comment=notenough --partition=learnfair --time=72:00:00 --mem=150Go --job-name=polytrain --gres=gpu:8 --cpus-per-task=80 --wrap="python -u -m pypolygames train --max_time=259200 --saving_period=4 --num_game 120 --per_thread_batchsize 192 --device cuda:0 cuda:0 cuda:1 cuda:2 cuda:3 cuda:4 cuda:5 cuda:6 cuda:7 --nnks 3 --epoch_len 256 --batchsize 396 --sync_period 256 --num_rollouts 600 --replay_capacity 20000 --replay_warmup 2000 --do_not_save_replay_buffer --ddp true --checkpoint_dir \"exps/yaclient_\$SLURM_JOB_ID\" --out_feature --game_name minishogi --model_name ResConvConvLogitPoolModel --turn_features --bn --nnks 3 --nnsize 8 --history 2 --nb_layers_per_net 6 --nb_nets 31 --bn --server_connect_hostname tcp://$host:10023 --num_game 20 " &
done
sbatch --array=0-279%20 --comment=notenough --partition=uninterrupted --time=72:00:00 --mem=150Go --job-name=polytrain --gres=gpu:8 --cpus-per-task=80 --wrap="python -u -m pypolygames train --max_time=259200 --saving_period=4 --num_game 120 --per_thread_batchsize 192 --device cuda:0 cuda:0 cuda:1 cuda:2 cuda:3 cuda:4 cuda:5 cuda:6 cuda:7 --nnks 3 --epoch_len 256 --batchsize 396 --sync_period 256 --num_rollouts 600 --replay_capacity 20000 --replay_warmup 2000 --do_not_save_replay_buffer --ddp true --checkpoint_dir \"exps/yaclient_\$SLURM_JOB_ID\" --out_feature --game_name minishogi --model_name ResConvConvLogitPoolModel --turn_features --bn --nnks 3 --nnsize 8 --history 2 --nb_layers_per_net 6 --nb_nets 31 --bn --server_connect_hostname tcp://$host:10023 --num_game 20 " &

5 changes: 5 additions & 0 deletions launch_minishogi_server.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
for i in 4096 2048 1024 512 256 128 64
do |
# LD_PRELOAD=/private/home/vegardmella/libjemalloc.so.1 \ # uncomment on H2
python -u -m pypolygames train --max_time=259200 --saving_period=4 --num_game 40 --per_thread_batchsize 12 --device cuda:0 cuda:0 cuda:1 cuda:2 cuda:3 cuda:4 cuda:5 cuda:6 cuda:7 --epoch_len 256 --batchsize $i --sync_period 32 --num_rollouts 600 --replay_capacity 100000 --replay_warmup 9000 --do_not_save_replay_buffer --ddp true --checkpoint_dir exps/minishogi --out_feature --game_name minishogi --model_name ResConvConvLogitPoolModel --turn_features --bn --nnks 3 --history 2 --nnsize 8 --nb_nets 31 --nb_layers_per_net 6 --nnks 3 --server_listen_endpoint tcp://*:10023 --num_game 0 --lr 1e-5
done
19 changes: 19 additions & 0 deletions meta_minishogi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Launch this script, wait 10 days, and you should get an excellent minishogi model.

export me=`whoami` |
sbatch --partition=dev --time=72:00:00 --mem=150Go --job-name=pgserver --gres=gpu:8 --cpus-per-task=80 --wrap="./launch_minishogi_server.sh" |
sleep 200
export host=`squeue -u $me| grep -i pgserver | sed 's/.*learnfair/learnfair/g' | sed 's/ //g'`
echo "host=<${host}>"
sbatch -w $host --partition=dev --time=72:00:00 --mem=150Go --job-name=pgser2 --gres=gpu:8 --cpus-per-task=80 --wrap="./launch_minishogi_server.sh"
sleep 60
sbatch -w $host --partition=dev --time=72:00:00 --mem=150Go --job-name=pgser3 --gres=gpu:8 --cpus-per-task=80 --wrap="./launch_minishogi_server.sh"
sleep 60
sbatch -w $host --partition=dev --time=72:00:00 --mem=150Go --job-name=pgser4 --gres=gpu:8 --cpus-per-task=80 --wrap="./launch_minishogi_server.sh"
sleep 60
sbatch -w $host --partition=dev --time=72:00:00 --mem=150Go --job-name=pgser5 --gres=gpu:8 --cpus-per-task=80 --wrap="./launch_minishogi_server.sh"
./launch_minishogi_clients.sh
sleep 86400
./launch_minishogi_clients.sh
sleep 86400
./launch_minishogi_clients.sh