for loops

Anatomy of a for loop

for (var in seq) { # start of for loop

# body of for loop 

} # end of for loop

var is a counter variable that will hold the current value of the loop
seq is an integer vector (or a vector of character strings) that defines the starting and ending values of the loop

It is traditional in the statistics literature to use variables i,j,k to indicate counters. e.g.

\[A = \sum_{i=1}^S n_i\]

You can also use t for time, but remember that t() is also an R function that transposes a matrix. R is smart enough to recognize the difference, but it could cause problems for others.

There are many ways to indicate the sequence, and the length of our loops will almost always be related to the size of a vector that we are working with.

We will only illustrate one way, because it is universal and applies to all for loops.

for (i in 1:5) {
  cat("stuck in a loop","\n")
  cat(3 + 2,"\n")
  cat(runif(1),"\n")
}
## stuck in a loop 
## 5 
## 0.5616766 
## stuck in a loop 
## 5 
## 0.4326963 
## stuck in a loop 
## 5 
## 0.604849 
## stuck in a loop 
## 5 
## 0.7496482 
## stuck in a loop 
## 5 
## 0.2151667
print(i)
## [1] 5

Instead, we want to use a counter variable that maps to the position of each element

my_dogs <- c("chow","akita","malamute","husky","samoyed")
for (i in 1:length(my_dogs)){
  cat("i =",i,"my_dogs[i] =" ,my_dogs[i],"\n")
}
## i = 1 my_dogs[i] = chow 
## i = 2 my_dogs[i] = akita 
## i = 3 my_dogs[i] = malamute 
## i = 4 my_dogs[i] = husky 
## i = 5 my_dogs[i] = samoyed

This is the typical way we make a loop. One potential hazard is if the vector we are working with is empty

my_bad_dogs <- NULL
for (i in 1:length(my_bad_dogs)){
  cat("i =",i,"my_bad_dogs[i] =" ,my_bad_dogs[i],"\n")
}
## i = 1 my_bad_dogs[i] = 
## i = 0 my_bad_dogs[i] =

So, a safer way is to use seq_along function:

for (i in seq_along(my_dogs)){
  cat("i =",i,"my_dogs[i] =" ,my_dogs[i],"\n")
}
## i = 1 my_dogs[i] = chow 
## i = 2 my_dogs[i] = akita 
## i = 3 my_dogs[i] = malamute 
## i = 4 my_dogs[i] = husky 
## i = 5 my_dogs[i] = samoyed

But notice now what happens when the vector is empty:

# This time we correctly skip my_bad_dogs and do not make the loop
for (i in seq_along(my_bad_dogs)){
  cat("i =",i,"my_bad_dogs[i] =" ,my_bad_dogs[i],"\n")
}

Alternatively, we may have a constant that we use to define the length of the vector:

zz <- 5
for (i in seq_len(zz)){
  cat("i =",i,"my_dogs[i] =" ,my_dogs[i],"\n")
}
## i = 1 my_dogs[i] = chow 
## i = 2 my_dogs[i] = akita 
## i = 3 my_dogs[i] = malamute 
## i = 4 my_dogs[i] = husky 
## i = 5 my_dogs[i] = samoyed

Tip #1: Don’t do things in the loop if you do not need to!

for (i in 1:length(my_dogs)){
  my_dogs[i] <- toupper(my_dogs[i])
  cat("i =",i,"my_dogs[i] =" ,my_dogs[i],"\n")
}
## i = 1 my_dogs[i] = CHOW 
## i = 2 my_dogs[i] = AKITA 
## i = 3 my_dogs[i] = MALAMUTE 
## i = 4 my_dogs[i] = HUSKY 
## i = 5 my_dogs[i] = SAMOYED
my_dogs <- tolower(my_dogs)

Tip #2: Do not change object dimensions (cbind,rbind,c,list) in the loop!

my_dat <- runif(1)
for (i in 2:10) {
  temp <- runif(1) 
  my_dat <- c(my_dat,temp) # do not change vector size in the loop!
  cat("loop number =",i,"vector element =", my_dat[i],"\n")
}
## loop number = 2 vector element = 0.2174434 
## loop number = 3 vector element = 0.563367 
## loop number = 4 vector element = 0.8625764 
## loop number = 5 vector element = 0.2639413 
## loop number = 6 vector element = 0.6354729 
## loop number = 7 vector element = 0.6397322 
## loop number = 8 vector element = 0.1105476 
## loop number = 9 vector element = 0.5726199 
## loop number = 10 vector element = 0.993259
print(my_dat)
##  [1] 0.3814199 0.2174434 0.5633670 0.8625764 0.2639413 0.6354729 0.6397322
##  [8] 0.1105476 0.5726199 0.9932590

Tip #3: Do not write a loop if you can vectorize an operation

my_dat <- 1:10
for (i in seq_along(my_dat)) {
  my_dat[i] <-  my_dat[i] + my_dat[i]^2
  cat("loop number =",i,"vector element =", my_dat[i],"\n")
}
## loop number = 1 vector element = 2 
## loop number = 2 vector element = 6 
## loop number = 3 vector element = 12 
## loop number = 4 vector element = 20 
## loop number = 5 vector element = 30 
## loop number = 6 vector element = 42 
## loop number = 7 vector element = 56 
## loop number = 8 vector element = 72 
## loop number = 9 vector element = 90 
## loop number = 10 vector element = 110
# No loop is needed here!
z <- 1:10
z <- z + z^2
print(z)
##  [1]   2   6  12  20  30  42  56  72  90 110

Tip #4: Always be alert to the distinction between the counter variable i and the vector element z[i]

z <- c(10,2,4)
for (i in seq_along(z)) {
  cat("i =",i,"z[i] = ",z[i],"\n")
}
## i = 1 z[i] =  10 
## i = 2 z[i] =  2 
## i = 3 z[i] =  4
# What is value of i at this point?
print(i)
## [1] 3

Tip #5: Use next to skip certain elements in the loop

z <- 1:20
# What if we want to work with only the odd-numbered elements?

for (i in seq_along(z)) {
  if(i %% 2==0) next
  print(i)
}
## [1] 1
## [1] 3
## [1] 5
## [1] 7
## [1] 9
## [1] 11
## [1] 13
## [1] 15
## [1] 17
## [1] 19
# Another method, probably faster (why?)
z <- 1:20
zsub <- z[z %% 2!=0] # contrast with logical expression in previous if statement!
length(z)
## [1] 20
for (i in seq_along(zsub)) {
  cat("i = ",i,"zsub[i] = ",zsub[i],"\n")
}
## i =  1 zsub[i] =  1 
## i =  2 zsub[i] =  3 
## i =  3 zsub[i] =  5 
## i =  4 zsub[i] =  7 
## i =  5 zsub[i] =  9 
## i =  6 zsub[i] =  11 
## i =  7 zsub[i] =  13 
## i =  8 zsub[i] =  15 
## i =  9 zsub[i] =  17 
## i =  10 zsub[i] =  19