Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified experiments/league.db
Binary file not shown.
Binary file added experiments/league.db.backup
Binary file not shown.
9 changes: 9 additions & 0 deletions experiments/league.temp.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
name,mu,sigma,trueskill
POWorkerRush,38.861939273926204,3.2121311873637186,29.225545711835046
POLightRush,25.754744223814487,2.2179711777256155,19.100830690637643
POHeavyRush,22.98870034666808,2.212164591404348,16.352206572455035
models/MicroRTSGridModeVecEnv__ppo_gridnet__1__1641331302/6144.pt,21.481666441292322,2.812293923609439,13.044784670464004
models/MicroRTSGridModeVecEnv__ppo_gridnet__1__1641330893/6144.pt,16.10799174066183,5.348608643642864,0.062165809733237154
models/MicroRTSGridModeVecEnv__ppo_gridnet__1__1641330984/6144.pt,25.0,8.333333333333334,0.0
models/MicroRTSGridModeVecEnv__ppo_gridnet__1__1641331138/6144.pt,25.0,8.333333333333334,0.0
PORangedRush,8.881771566353699,3.3669435969510504,-1.2190592244994534
86 changes: 47 additions & 39 deletions experiments/new_league.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def parse_args():
help='if toggled, the database will be updated')
parser.add_argument('--cuda', type=lambda x: bool(strtobool(x)), default=True, nargs='?', const=True,
help='if toggled, cuda will not be enabled by default')
parser.add_argument('--maps', nargs='+', default=["maps/16x16/basesWorkers16x16B.xml","maps/16x16/basesWorkers16x16C.xml","maps/16x16/basesWorkers16x16D.xml", "maps/16x16/basesWorkers16x16E.xml", "maps/16x16/basesWorkers16x16F.xml"], # [],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The map-related changes should be incorporated into the master.

help='the maps')
# ["randomBiasedAI","workerRushAI","lightRushAI","coacAI"]
# default=["randomBiasedAI","workerRushAI","lightRushAI","coacAI","randomAI","passiveAI","naiveMCTSAI","mixedBot","rojo","izanagi","tiamat","droplet","guidedRojoA3N"]
args = parser.parse_args()
Expand All @@ -66,6 +68,8 @@ def parse_args():
dbname = "league"
if(args.partial_obs):
dbname = 'po_league'

print(dbname)
db = SqliteDatabase(f"{dbname}.db")
class BaseModel(Model):
class Meta:
Expand Down Expand Up @@ -95,7 +99,7 @@ class Outcome(Enum):
LOSS = -1

class Match:
def __init__(self, partial_obs: bool, match_up=None):
def __init__(self, partial_obs: bool, match_up=None, map_path="maps/16x16/basesWorkers16x16A.xml"):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The map-related changes should be incorporated into the master.

# mode 0: rl-ai vs built-in-ai
# mode 1: rl-ai vs rl-ai
# mode 2: built-in-ai vs built-in-ai
Expand All @@ -104,6 +108,7 @@ def __init__(self, partial_obs: bool, match_up=None):
built_in_ais2=None
rl_ai=None
rl_ai2=None
self.map_path = map_path
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The map-related changes should be incorporated into the master.


# determine mode
rl_ais = []
Expand Down Expand Up @@ -143,14 +148,15 @@ def __init__(self, partial_obs: bool, match_up=None):
self.device = torch.device("cuda" if torch.cuda.is_available() and args.cuda else "cpu")
max_steps = 5000
if mode == 0:
print(partial_obs)
self.envs = MicroRTSGridModeVecEnv(
num_bot_envs=len(built_in_ais),
num_selfplay_envs=0,
partial_obs=partial_obs,
max_steps=max_steps,
render_theme=2,
ai2s=built_in_ais,
map_paths=["maps/16x16/basesWorkers16x16A.xml"],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The map-related changes should be incorporated into the master.

map_paths=[map_path],
reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
)
self.agent = Agent(self.envs).to(self.device)
Expand All @@ -163,7 +169,7 @@ def __init__(self, partial_obs: bool, match_up=None):
partial_obs=partial_obs,
max_steps=max_steps,
render_theme=2,
map_paths=["maps/16x16/basesWorkers16x16A.xml"],
map_paths=[map_path],
reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
)
self.agent = Agent(self.envs).to(self.device)
Expand All @@ -178,7 +184,7 @@ def __init__(self, partial_obs: bool, match_up=None):
ai2s=built_in_ais2,
max_steps=max_steps,
render_theme=2,
map_paths=["maps/16x16/basesWorkers16x16.xml"],
map_paths=[map_path],
reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0])
)
self.envs = MicroRTSStatsRecorder(self.envs)
Expand Down Expand Up @@ -339,41 +345,43 @@ def get_leaderboard_existing_ais(existing_ai_names):
if idx == 0:
match_up = list(reversed(match_up))

m = Match(args.partial_obs, match_up)
challenger = AI.get_or_none(name=m.p0)
defender = AI.get_or_none(name=m.p1)

r = m.run(args.num_matches // 2)
for item in r:
drawn = False
if item == Outcome.WIN.value:
winner = challenger
loser = defender
elif item == Outcome.DRAW.value:
drawn = True
else:
winner = defender
loser = challenger

print(f"{winner.name} {'draws' if drawn else 'wins'} {loser.name}")

winner_rating, loser_rating = rate_1vs1(
Rating(winner.mu, winner.sigma),
Rating(loser.mu, loser.sigma),
drawn=drawn)

winner.mu, winner.sigma = winner_rating.mu, winner_rating.sigma
loser.mu, loser.sigma = loser_rating.mu, loser_rating.sigma
winner.save()
loser.save()

for match in range(args.num_matches // 2):
m = Match(args.partial_obs, match_up, args.maps[match])
challenger = AI.get_or_none(name=m.p0)
defender = AI.get_or_none(name=m.p1)

MatchHistory(
challenger=challenger,
defender=defender,
win=int(item == 1),
draw=int(item == 0),
loss=int(item == -1),
).save()
r = m.run(1)
for item in r:
drawn = False
if item == Outcome.WIN.value:
winner = challenger
loser = defender
elif item == Outcome.DRAW.value:
drawn = True
else:
winner = defender
loser = challenger

print(f"{winner.name} {'draws' if drawn else 'wins'} {loser.name}")

winner_rating, loser_rating = rate_1vs1(
Rating(winner.mu, winner.sigma),
Rating(loser.mu, loser.sigma),
drawn=drawn)

winner.mu, winner.sigma = winner_rating.mu, winner_rating.sigma
loser.mu, loser.sigma = loser_rating.mu, loser_rating.sigma
winner.save()
loser.save()

MatchHistory(
challenger=challenger,
defender=defender,
win=int(item == 1),
draw=int(item == 0),
loss=int(item == -1),
).save()
get_leaderboard().to_csv(f"{dbname}.csv", index=False)

# case 2: new AIs
Expand Down Expand Up @@ -447,7 +455,7 @@ def binary_search(leaderboard, low, high, ai, n=5):
ai = AI.get(name=new_ai_name)
binary_search(leaderboard, 0, len(leaderboard), ai.name, n=5)

get_leaderboard().to_csv(f"{dbname}.temp.csv", index=False)
get_leaderboard().to_csv(f"league.temp.csv", index=False)

print("=======================")
print(get_leaderboard())
Expand Down
8 changes: 4 additions & 4 deletions experiments/po_league.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name,mu,sigma,trueskill
POWorkerRush,39.969639589352205,3.468613295987325,29.56379970139023
POLightRush,26.237614972853905,2.4859644240612635,18.779721700670116
POHeavyRush,21.253428741884882,2.4920725461958844,13.77721110329723
PORangedRush,8.351719772683518,3.0734987337830626,-0.8687764286656705
POWorkerRush,38.861939273926204,3.2121311873637186,29.225545711835046
POLightRush,25.754744223814487,2.2179711777256155,19.100830690637643
POHeavyRush,22.98870034666808,2.212164591404348,16.352206572455035
PORangedRush,8.881771566353699,3.3669435969510504,-1.2190592244994534
Binary file modified experiments/po_league.db
Binary file not shown.
9 changes: 5 additions & 4 deletions experiments/ppo_gridnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def get_value(self, x):
+ [microrts_ai.randomBiasedAI for _ in range(min(args.num_bot_envs, 2))]
+ [microrts_ai.lightRushAI for _ in range(min(args.num_bot_envs, 2))]
+ [microrts_ai.workerRushAI for _ in range(min(args.num_bot_envs, 2))],
map_paths=["maps/16x16/basesWorkers16x16.xml"],
map_paths=["maps/16x16/basesWorkers16x16A.xml","maps/16x16/basesWorkers16x16A.xml","maps/16x16/basesWorkers16x16B.xml","maps/16x16/basesWorkers16x16B.xml","maps/16x16/basesWorkers16x16C.xml","maps/16x16/basesWorkers16x16C.xml","maps/16x16/basesWorkers16x16D.xml","maps/16x16/basesWorkers16x16D.xml","maps/16x16/basesWorkers16x16E.xml","maps/16x16/basesWorkers16x16E.xml","maps/16x16/basesWorkers16x16F.xml","maps/16x16/basesWorkers16x16F.xml","maps/16x16/basesWorkers16x16G.xml","maps/16x16/basesWorkers16x16G.xml","maps/16x16/basesWorkers16x16H.xml","maps/16x16/basesWorkers16x16H.xml","maps/16x16/basesWorkers16x16I.xml","maps/16x16/basesWorkers16x16I.xml","maps/16x16/basesWorkers16x16J.xml","maps/16x16/basesWorkers16x16J.xml","maps/16x16/basesWorkers16x16K.xml","maps/16x16/basesWorkers16x16K.xml","maps/16x16/basesWorkers16x16L.xml","maps/16x16/basesWorkers16x16L.xml"],
reward_weight=np.array([10.0, 1.0, 1.0, 0.2, 1.0, 4.0]),
)
envs = MicroRTSStatsRecorder(envs)
Expand Down Expand Up @@ -340,8 +340,8 @@ def get_value(self, x):

## EVALUATION LOGIC:
eval_queue = []
trueskill_df = pd.read_csv("league.csv")
trueskill_step_df = pd.read_csv("league.csv")
trueskill_df = pd.read_csv("po_league.csv")
trueskill_step_df = pd.read_csv("po_league.csv")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We shouldn't have to worry about this anymore because the new script contains an output path for to CSVs: https://github.com/vwxyzjn/gym-microrts/blob/3d7a42f46efbd39a0b806388b8a445fbee48d00f/experiments/ppo_gridnet.py#L240.

trueskill_step_df["type"] = trueskill_step_df["name"]
trueskill_step_df["step"] = 0
preset_trueskill_step_df = trueskill_step_df.copy()
Expand Down Expand Up @@ -476,7 +476,7 @@ def get_value(self, x):
torch.save(agent.state_dict(), f"models/{experiment_name}/agent.pt")
torch.save(agent.state_dict(), f"models/{experiment_name}/{global_step}.pt")
wandb.save(f"models/{experiment_name}/agent.pt", base_path=f"models/{experiment_name}", policy="now")
subprocess.Popen(["python", "new_league.py", "--evals", f"models/{experiment_name}/{global_step}.pt", "--update-db", "false"])
subprocess.Popen(["python", "new_league.py", "--evals", f"models/{experiment_name}/{global_step}.pt", "--update-db", "false", "--partial-obs", str(args.partial_obs)])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be needed

eval_queue += [f"models/{experiment_name}/{global_step}.pt"]
print(f"Evaluating models/{experiment_name}/{global_step}.pt")

Expand All @@ -500,6 +500,7 @@ def get_value(self, x):
"trueskill": league.loc[model_path]["trueskill"]
}
trueskill_df = trueskill_df.append(trueskill_data, ignore_index=True)
print(trueskill_df)
wandb.log({"trueskill": wandb.Table(dataframe=trueskill_df)})
trueskill_data["type"] = "training"
trueskill_data["step"] = model_global_step
Expand Down