Functions in R

sum(3,2) # a "prefix" function
3 + 2 # an "operator", but it is actually a function
`+`(3,2) # the operator is an "infix" function

y <- 3
print(y)

`<-`(yy,3) # another "infix" function
print(yy)

# to see contents of a function, print it
print(read.table)

sd # shows the code
sd(c(3,2)) # call the function with parameters
# sd() # call function with default values for parameters

The Anatomy Of A User-Defined Function

functionName <- function(parX=defaultX,parY=defaultY,parZ=defaultZ) { 

# curly bracket open marks the start of the function body

# Body of the function goes here
# Lines of R code and annotations
# May also call functions
# May also create functions
# May also create local variables

return(z)  # returns from the function a single element (z could be a list)

# curly bracket close marks the end of the function body
} 

# prints the function body
functionName 

# calls the function with default values and returns object z
functionName() 

# calls the function with user-specified values for each paramater
functionName(parX=myMatrix,parY="Order",parZ=c(0.3,1.6,2,6))

Stylistic Conventions For Programming Functions

A Sample Function For Hardy-Weinberg Equilibrium

##################################################
# FUNCTION: HardyWeinberg
# input: an allele frequency p (0,1)
# output: p and the frequencies of the 3 genotypes AA, AB, BB
#------------------------------------------------- 
HardyWeinberg <- function(p=runif(1)) {
  q <- 1 - p
  fAA <- p^2
  fAB <- 2*p*q
  fBB <- q^2
 vecOut <- signif(c(p=p,AA=fAA,AB=fAB,BB=fBB),digits=3)
 return(vecOut)
}
##################################################
 HardyWeinberg()
 HardyWeinberg(p=0.5) # pass to the parameter the value p
 # print(p) # error because p does not exist in the global environment
 pp <- 0.6 # variable pp is stored in global environment
 HardyWeinberg(p=pp) # pass contents of variable pp to function parameter p
 print(pp) # variable pp is still stored in memory

Use Multiple return() Statements For Different Possible Return Values

##################################################
# FUNCTION: HardyWeinberg2
# input: an allele frequency p (0,1)
# output: p and the frequencies of the 3 genotypes AA, AB, BB
#------------------------------------------------- 
HardyWeinberg2<- function(p=runif(1)) {
  if (p > 1.0 | p < 0.0) {
    return("Function failure: p must be >= 0.0 and <= 1.0")
  }
  q <- 1 - p
  fAA <- p^2
  fAB <- 2*p*q
  fBB <- q^2
 vecOut <- signif(c(p=p,AA=fAA,AB=fAB,BB=fBB),digits=3)
 return(vecOut)
  }
##################################################
  HardyWeinberg2()
  HardyWeinberg2(1.1) # OK, print error to screen
  z <- HardyWeinberg2(1.1) # uggh no error printed
  print(z) # Error message was saved to variable z!

Use Stop For True Error Trapping

##################################################  
# FUNCTION: HardyWeinberg3
# input: an allele frequency p (0,1)
# output: p and the frequencies of the 3 genotypes AA, AB, BB
#-------------------------------------------------
HardyWeinberg3<- function(p=runif(1)) {
  if (p > 1.0 | p < 0.0) {
    stop("Function failure: p must be >= 0.0 and <= 1.0")
  }
  q <- 1 - p
  fAA <- p^2
  fAB <- 2*p*q
  fBB <- q^2
 vecOut <- signif(c(p=p,AA=fAA,AB=fAB,BB=fBB),digits=3)
 return(vecOut)
  }
##################################################  
  HardyWeinberg3()
#  z <- HardyWeinberg3(1.1) 

Scoping In Functions

myFunc <- function(a=3,b=4) {
  z <- a + b
  return(z)
}
myFunc()

myFuncBad <-function(a=3) {
  z <- a + b
  return(z)
}
myFuncBad() # crashes because it can't find b
b <- 100
myFuncBad() # OK now because b exists as a global variable

# But it is fine to create variables locally
myFuncOK <- function(a=3) {
  bb <- 100
  z <- a + bb
  return(z)
}

myFuncOK() # no problems now
print(bb) # but this variable is still hidden from the global environment

Simple regression function

##################################################
# FUNCTION: fitLinear 
# fits simple regression line
# inputs: numeric vector of predictor (x) and response (y)
# outputs: slope and p-value
#------------------------------------------------- 
fitLinear <- function(x=runif(20),y=runif(20)) {
  myMod <- lm(y~x) # fit the model
  myOut <- c(slope=summary(myMod)$coefficients[2,1],
             pValue=summary(myMod)$coefficients[2,4])
  plot(x=x,y=y) # quick and dirty plot to check output
  return(myOut)
}
##################################################
fitLinear()

Creating a more complex default value

##################################################
# FUNCTION: fitLinear2       
# fits simple regression line
# inputs: numeric vector of predictor (x) and response (y)
# outputs: slope and p-value
#------------------------------------------------- 
fitLinear2 <- function(p=NULL) {
  if(is.null(p)) {
     p <- list(x=runif(20),y=runif(20))
  }
  myMod <- lm(p$x~p$y) # fit the model
  myOut <- c(slope=summary(myMod)$coefficients[2,1],
             pValue=summary(myMod)$coefficients[2,4])
  plot(x=p$x,y=p$y) # quick and dirty plot to check output
  return(myOut)
}

##################################################
fitLinear2()
myPars <-list(x=1:10,y=runif(10))
fitLinear2(myPars)

Using do.call To Pass A List Of Parameters To A Function

z <- c(runif(99),NA)
mean(z) # oops, mean doesn't work if there is an NA
mean(x=z,na.rm=TRUE) # change the default value for na.rm
mean(x=z,na.rm=TRUE,trim=0.05) # check out the "trim" option in help
l <- list(x=z,na.rm=TRUE,trim=0.05) # bundle paramaters as a list
do.call(mean,l) # use the do.call function with the function name and the parameter list