torus.width <- 20 torus.height <- 10 agent.count <- 10 # Distance agents move per tick (second or whatever) agent.speed.per.tick <- 1 payoff.collide <- 0 payoff.move <- 10 collision.radius <- 1; # Possible directions to take, in radians k <- 8 n <- (2*pi)/k A <- seq(0, 2*pi - n, n) # We give all agents some random uniform starting position along the torus agent.coordinates <- matrix ( data = c(runif(agent.count, 0, torus.width), runif(agent.count, 0, torus.height)), ncol = agent.count, nrow = 2, dimnames = list( c("x","y"), c()), byrow = T) # Agent propensities theta <- matrix ( data = 1, ncol = agent.count, nrow = length(A) ) # Look in the file if you're interested source("Functions.R") # Epsilon-greedy approach # The best lever is selected for a proportion 1 - epsilon of the trials, and a lever is selected # at random (with uniform probability) for a proportion epsilon. A typical parameter value might # be epsilon = 0.1, but this can vary widely depending on circumstances and predilections. epsilon <- 0.1 # We use nnet's which.is.max so we can break ties at random. Which I think is more elegant require(nnet) for (payoff.move in seq(10, 100, by = 10) ){ for(epsilon in seq(0.1, 0.5, by = 0.1)) { print(paste(payoff.move, epsilon)) # For now repeat the process 100 times max.ticks <- 100 plotData <- matrix(data = NA, ncol = length(A), nrow = max.ticks) for(tick in 1:max.ticks) { for(agent.id in 1:agent.count) { # We use the Epsilon-greedy method # So we randomly decide for each agent whether it will explore or not do.explore <- runif(1, 0, 1) <= epsilon if(do.explore) { # Choose a uniform random direction directions <- (sample(A,1)) } else{ # Choose the direction with the highest propensity, break ties at random directions <- (A[which.is.max(theta[, agent.id])]) } # Give us the new coordinates for all agents if they would have moved coords.new <- t(move(torus.width, torus.height, agent.coordinates["x",agent.id], agent.coordinates["y",agent.id], agent.speed.per.tick, directions)) collisions <- collision (torus.width, torus.height, collision.radius, x1 = coords.new["x", 1], x2 = agent.coordinates["x",], y1 = coords.new["y", 1], y2 = agent.coordinates["y",]) collisions[agent.id] <- F # Create the unit matrix with 1's for each direction chosen per agent e <- matrix(data = 0, nrow = length(A), ncol = 1) e[match(directions, A)] <- 1 if(any(collisions)) { u <- payoff.collide } else{ u <- payoff.move } theta[, agent.id] <- calculateNewMean(theta[, agent.id], e * u, tick) plotData[tick, ] <- rowMeans(theta) if(!any(collisions)) { agent.coordinates[, agent.id] <- coords.new } } } r.sums <- rowSums(theta) ratios <- r.sums / sum(r.sums) dir.index <- which(ratios >= 0.3) if(length(dir.index) > 0) { write.csv(theta, file= paste(epsilon, payoff.move, "theta.csv", sep = "-"), quote = FALSE, row.names = FALSE) write.csv(plotData, file= paste(epsilon, payoff.move, "plotData.csv", sep = "-"), quote = FALSE, row.names = FALSE) matplot(plotData, type = "l", xlab = "Time", ylab = "Mean propensity", main = "Epsilon-greedy Search") } } }