torus.width <- 25
torus.height <- 25

agent.count <- 5
# Distance agents move per tick (second or whatever)
agent.speed.per.tick <- 1
payoff.collide <- 0
payoff.move <- 1
collision.radius <- 1;

n <- (2*pi)/8
A <- seq(0, 2*pi - n, n)

# We give all agents some random uniform starting position along the torus
agent.coordinates <- matrix (
  data = c(runif(agent.count, 0, torus.width), runif(agent.count, 0, torus.height)), 
  ncol = agent.count, 
  nrow = 2, 
  dimnames = list( c("x","y"), c()),
  byrow = T)

# Mean payoffs for all directions and agents
theta <- matrix (
  data = 0,
  ncol = agent.count,
  nrow = length(A)
)

# Count the number of times an action has been chosen by each agent
A.count <- matrix (
  data = 0,
  ncol = agent.count,
  nrow = length(A)
)

TD.error <- matrix (
  data = 0,
  ncol = agent.count,
  nrow = length(A)
)

# Epsilon-greedy value for each agent
epsilon <- rep(1, agent.count)

# Positive constant called inverse sensitivity
sigma <- 1
# Determines the influence of the selected action on the exploration rate
delta <- 1 / length(A)


# Look in the file if you're interested
source("Functions.R")

# For now repeat the process 100 times
max.ticks <- 100

plotData <- matrix(data = NA, ncol = length(A), nrow = max.ticks)

# We use nnet's which.is.max so we can break ties at random. Which I think is more elegant
require(nnet)

for (payoff.move in seq(10, 100, by = 10) ){
  for(epsilon in seq(0.1, 0.5, by = 0.1)) {
    print(paste(payoff.move, epsilon))
    
for(tick in 1:max.ticks) {

  do.explore <- runif(agent.count, 0, 1) <= epsilon
  # Maybe we can make this prettier but we need the agent index in theta
  directions <- sapply(1:agent.count, function(agent.id) {
    if(do.explore[agent.id]) {
      # Choose a uniform random direction
      return(sample(A,1))
    } else{
      # Choose the direction with the highest propensity, break ties at random
      return(A[which.is.max(theta[, agent.id])])
    }
  })
  
  # Give us the new coordinates for all agents if they would have moved
  coords.new <- t(move(torus.width, torus.height, 
                       agent.coordinates["x",], 
                       agent.coordinates["y",],
                       agent.speed.per.tick,
                       directions))
  
  collisions <- sapply(1:agent.count, function(agent.id) {
    # Note that we use the entire column for x2 and y2 so we get a matrix of the correct size
    # We fix this later on
    collision (torus.width, torus.height, collision.radius,
               x1 = coords.new["x", agent.id], x2 = agent.coordinates["x",],
               y1 = coords.new["y", agent.id], y2 = agent.coordinates["y",])
  })
  # Fix what we did above and ignore agent collisions with itself
  diag(collisions) <- F
  
  # Reduce the matrix to a logical vector, handy for later on
  collisions <- apply(collisions, 2, function(collision) {
    any(collision)
  })
  
  direction.index <- cbind(match(directions, A), 1:length(directions))
  
  # Create the unit matrix with 1's for each direction chosen per agent
  e <- matrix(data = 0, nrow = length(A), ncol = agent.count)
  e[direction.index] <- 1
  
  # Create the payoff vector by looking at collisions and giving the appropriate payoff
  u <- diag(ifelse(collisions, payoff.collide, payoff.move))
  
  # Reward matrix
  r <- (e %*% u)
  
  # Temporal Difference error
  TD.error <- r - theta
  
  # The number of preceding selections of action a
  A.count <- A.count + e
  
  # When the reward distribution is stationary, then the sample average method can be used
  alpha <- 1 / (1 + A.count)
  
  theta <- theta + alpha * TD.error
  
  epsilon <- delta * softmaxBoltzmann(
    alpha[direction.index], 
    TD.error[direction.index],
    sigma) + (1 - delta) * epsilon
  
  plotData[tick, ] <- rowMeans(theta)
}

    r.sums <- rowSums(theta)
    ratios <- r.sums / sum(r.sums)
    dir.index <- which(ratios >= 0.9)
    
    if(length(dir.index) > 0) {
      write.csv(theta, file= paste(epsilon, payoff.move, "theta.csv", sep = "-"), quote =  FALSE, row.names = FALSE)
      write.csv(plotData, file= paste(epsilon, payoff.move, "plotData.csv", sep = "-"), quote =  FALSE, row.names = FALSE)
      matplot(plotData, type = "l", xlab = "Time", ylab = "Mean propensity", main = "Linear Updating Model")
    }
  }
}