# Step 1 - Install the Lahman dataset - comment to run again as it will be already loaded. install.packages("Lahman") # Step 2 - load the library library(Lahman) # Step 3 - ensure that the year is at least 2018 max(Batting$yearID) # Step 4 - View the Batting table View(Batting) # Step 5 - create the filter for 2018 (Batting and Pitching) batting2018 <- Batting[Batting$yearID == 2018, ] pitching2018 <- Pitching[Pitching$yearID == 2018, ] # Step 5a - View the new batting2018 table View(batting2018) # feel free to do this with pitching2018 too # Step 6 - Add the batting average (BA) column batting2018$BA = round(batting2018$H / batting2018$AB * 100, 0) # step 7 - Filter out the NaN (see blog text) batting2018 <- batting2018[batting2018$AB > 0, ] # Step 8 - filter out players how have not played enough games (see blog text) batting2018 <- batting2018[batting2018$G > 75, ] # Step 9 - select the players at random and split into two teams playerPicks <- sample(1:nrow(batting2018), 18, replace=FALSE) homeTeam <- batting2018[playerPicks[1:9], ] awayTeam <- batting2018[playerPicks[10:18], ] # Step 10 - add player name to home and away teams homeIndex <- match(homeTeam$playerID, People$playerID) awayIndex <- match(awayTeam$playerID, People$playerID) homeTeam$playerName <- paste(People[homeIndex, "nameFirst"], People[homeIndex, "nameLast"]) awayTeam$playerName <- paste(People[awayIndex, "nameFirst"], People[awayIndex, "nameLast"]) # Step 11 - print out in the format needed for game (see blog text) homeTeam[, c("playerName", "BA")] awayTeam[, c("playerName", "BA")]