%pylab inline
import torch
import sys, os
import pystk
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('device = ', device)
# Init supertuxkart
config = pystk.GraphicsConfig.hd()
config.screen_width = 100
config.screen_height = 80
pystk.init(config)
def rollout(n_step=100):
config = pystk.RaceConfig()
config.track = 'lighthouse'
config.players[0].controller = pystk.PlayerConfig.Controller.AI_CONTROL
k = pystk.Race(config)
k.start()
try:
data = []
for i in range(n_step):
k.step()
la = k.last_action[0]
data.append( (np.array(k.render_data[0].image), (la.steer, la.acceleration, la.brake)) )
finally:
k.stop()
del k
return data
data = rollout(100)
print( data[-1][1] )
for i in range(5):
figure()
imshow(data[i*20+10][0])
axis('off')
class ActionNet(torch.nn.Module):
def __init__(self):
super().__init__()
self.network = torch.nn.Sequential(
torch.nn.Conv2d(3, 32, 5, stride=2),
torch.nn.ReLU(),
torch.nn.Conv2d(32, 64, 5, stride=2),
torch.nn.ReLU(),
torch.nn.Conv2d(64, 96, 5, stride=2),
torch.nn.ReLU(),
torch.nn.Conv2d(96, 128, 5),
torch.nn.ReLU()
)
self.classifier = torch.nn.Linear(128, 3)
def forward(self, x):
f = self.network(x)
return self.classifier(f.mean(dim=(2,3)))
actor = ActionNet()
%load_ext tensorboard
import tempfile
log_dir = tempfile.mkdtemp()
%tensorboard --logdir {log_dir} --reload_interval 1
import torch.utils.tensorboard as tb
import itertools
n_epochs = 10
batch_size = 128
logger = tb.SummaryWriter(log_dir+'/actor2', flush_secs=1)
# Create the network
actor = ActionNet().to(device)
# Create the optimizer
optimizer = torch.optim.Adam(actor.parameters())
# Create the loss
loss = torch.nn.MSELoss()
train_data = list(itertools.chain(*[rollout(200) for it in range(10)]))
# Start training
global_step = 0
for epoch in range(n_epochs):
# Uncomment this to enable DAgger (make sure to define rollout_agent first)
# if epoch > 1:
# train_data.extend(rollout_agent(actor))
np.random.shuffle(train_data)
for iteration in range(0, len(train_data)-batch_size+1, batch_size):
batch_data = torch.as_tensor([train_data[i][0] for i in range(iteration, iteration+batch_size)]).permute(0,3,1,2).float()/255. - 0.5
batch_label = torch.as_tensor([train_data[i][1] for i in range(iteration, iteration+batch_size)]).float()
o = actor(batch_data.to(device))
loss_val = loss(o, batch_label.to(device))
logger.add_scalar('train/loss', loss_val, global_step)
global_step += 1
optimizer.zero_grad()
loss_val.backward()
optimizer.step()
def rollout_agent(agent, n_step=100):
config = pystk.RaceConfig()
config.track = 'lighthouse'
# config.players[0].controller = pystk.PlayerConfig.Controller.AI_CONTROL
k = pystk.Race(config)
k.start()
k.step()
try:
data = []
for i in range(n_step):
x = torch.as_tensor(np.array(k.render_data[0].image))[None].permute(0,3,1,2).float()/255. - 0.5
a = actor(x.to(device))[0]
k.step(pystk.Action(steer=float(a[0]), acceleration=float(a[1]), brake=float(a[2])>0.5))
la = k.last_action[0]
data.append( (np.array(k.render_data[0].image), (la.steer, la.acceleration, la.brake)) )
finally:
k.stop()
del k
return data
data = rollout_agent(actor, 100)
print( data[-1][1] )
for i in range(5):
figure()
imshow(data[i*20+10][0])
axis('off')