torus.width <- 25 torus.height <- 25 agent.count <- 5 # Distance agents move per tick (second or whatever) agent.speed.per.tick <- 1 payoff.collide <- 0 payoff.move <- 1 collision.radius <- 1; n <- (2*pi)/8 A <- seq(0, 2*pi - n, n) # We give all agents some random uniform starting position along the torus agent.coordinates <- matrix ( data = c(runif(agent.count, 0, torus.width), runif(agent.count, 0, torus.height)), ncol = agent.count, nrow = 2, dimnames = list( c("x","y"), c()), byrow = T) # Mean payoffs for all directions and agents theta <- matrix ( data = 0, ncol = agent.count, nrow = length(A) ) # Count the number of times an action has been chosen by each agent A.count <- matrix ( data = 0, ncol = agent.count, nrow = length(A) ) TD.error <- matrix ( data = 0, ncol = agent.count, nrow = length(A) ) # Epsilon-greedy value for each agent epsilon <- rep(1, agent.count) # Positive constant called inverse sensitivity sigma <- 1 # Determines the influence of the selected action on the exploration rate delta <- 1 / length(A) # Look in the file if you're interested source("Functions.R") # For now repeat the process 100 times max.ticks <- 100 plotData <- matrix(data = NA, ncol = length(A), nrow = max.ticks) # We use nnet's which.is.max so we can break ties at random. Which I think is more elegant require(nnet) for (payoff.move in seq(10, 100, by = 10) ){ for(epsilon in seq(0.1, 0.5, by = 0.1)) { print(paste(payoff.move, epsilon)) for(tick in 1:max.ticks) { do.explore <- runif(agent.count, 0, 1) <= epsilon # Maybe we can make this prettier but we need the agent index in theta directions <- sapply(1:agent.count, function(agent.id) { if(do.explore[agent.id]) { # Choose a uniform random direction return(sample(A,1)) } else{ # Choose the direction with the highest propensity, break ties at random return(A[which.is.max(theta[, agent.id])]) } }) # Give us the new coordinates for all agents if they would have moved coords.new <- t(move(torus.width, torus.height, agent.coordinates["x",], agent.coordinates["y",], agent.speed.per.tick, directions)) collisions <- sapply(1:agent.count, function(agent.id) { # Note that we use the entire column for x2 and y2 so we get a matrix of the correct size # We fix this later on collision (torus.width, torus.height, collision.radius, x1 = coords.new["x", agent.id], x2 = agent.coordinates["x",], y1 = coords.new["y", agent.id], y2 = agent.coordinates["y",]) }) # Fix what we did above and ignore agent collisions with itself diag(collisions) <- F # Reduce the matrix to a logical vector, handy for later on collisions <- apply(collisions, 2, function(collision) { any(collision) }) direction.index <- cbind(match(directions, A), 1:length(directions)) # Create the unit matrix with 1's for each direction chosen per agent e <- matrix(data = 0, nrow = length(A), ncol = agent.count) e[direction.index] <- 1 # Create the payoff vector by looking at collisions and giving the appropriate payoff u <- diag(ifelse(collisions, payoff.collide, payoff.move)) # Reward matrix r <- (e %*% u) # Temporal Difference error TD.error <- r - theta # The number of preceding selections of action a A.count <- A.count + e # When the reward distribution is stationary, then the sample average method can be used alpha <- 1 / (1 + A.count) theta <- theta + alpha * TD.error epsilon <- delta * softmaxBoltzmann( alpha[direction.index], TD.error[direction.index], sigma) + (1 - delta) * epsilon plotData[tick, ] <- rowMeans(theta) } r.sums <- rowSums(theta) ratios <- r.sums / sum(r.sums) dir.index <- which(ratios >= 0.9) if(length(dir.index) > 0) { write.csv(theta, file= paste(epsilon, payoff.move, "theta.csv", sep = "-"), quote = FALSE, row.names = FALSE) write.csv(plotData, file= paste(epsilon, payoff.move, "plotData.csv", sep = "-"), quote = FALSE, row.names = FALSE) matplot(plotData, type = "l", xlab = "Time", ylab = "Mean propensity", main = "Linear Updating Model") } } }