# Taken from the chapter on reinforcement learning # Take note that this calculation is vectorized and uses a matrix for 'theta' and 'e' linearUpdate <- function(lambda, theta, u, e, w) { return (lambda * theta + e %*% u + w); } # Calculate the new average by multiplying the old average by (n-1), # adding the new number and deviding the total by n calculateNewMean <- function(theta, u, n) { return ((theta * (n-1) + u) / n); } # Calculates collisions between agents # w = width, h = height, r = radius of collision, x1 ... y2 = coordinates collision <- function(w, h, r, x1, x2, y1, y2) { # Note that there is no expensive sqrt function as we can simply square both sides of the equation! # We use pmin because it gives us the minimum per element for two vectors instead of the total minimum round(pmin( abs(x1 - x2), w - abs(x1 - x2) ) ^2 + pmin( abs(y1 - y2), h - abs(y1 - y2) ) ^2, 4 ) <= r^2 } # Calculates new coordinates, note that 'cx' and 'cy' are vectors # So we call this function once per tick and do the calculations for all agents at the same time move <- function(w, h, cx, cy, r, a) { # New coordinate is simply location on a circle with radius 'r' using direction 'a' + old location x <- cx + r * cos(a) y <- cy + r * sin(a) # If our coordinates are out of bounds, wrap them around x <- ifelse(x >= w, x - w, x) x <- ifelse(x < 0, w + x, x) y <- ifelse(y >= h, y - h, y) y <- ifelse(y < 0, h + y, y) return(cbind(x, y)) } softmaxBoltzmann <- function(alpha, TD.error, sigma) { (1 - exp( -abs( alpha * TD.error) / sigma )) / (1 + exp( -abs( alpha * TD.error) / sigma )) } sec <- function(z) { 1/cos(z) } csc <- function(z) { 1 / sin(z) }