Introduction to Loop Functions

Advantages of Scripting

Elements of the R scripts

Selecting a Working Directory by menu

Selecting a Working Directory by command

getwd()
## [1] "C:/Users/Jeon Lee/Documents/R/BICF_nanocourse/Intro_Looping_Function"
#datadir <- "BICF_nanocourse/Intro_Looping_Function"
#setwd(datadir)
list.files()
##  [1] "cool_plot.ps"                            
##  [2] "exp_plot.R"                              
##  [3] "Introduction_Looping_Functions.html"     
##  [4] "Introduction_Looping_Functions.Rmd"      
##  [5] "main_Introduction_Looping_Functions.html"
##  [6] "main_Introduction_Looping_Functions.Rmd" 
##  [7] "mpg_by_cyl.png"                          
##  [8] "mpg_mean_cyl.txt"                        
##  [9] "mtcars.csv"                              
## [10] "SetWorkingDirectory.PNG"

Reading data from a file

?read.table
## starting httpd help server ... done
getwd()
## [1] "C:/Users/Jeon Lee/Documents/R/BICF_nanocourse/Intro_Looping_Function"
sep.csv <- c(',') #for tab-delimited# sep.tab <- c('\t')
csv.file <- c("mtcars.csv")
tbl <- read.table(file=csv.file, sep=sep.csv, header=TRUE)
head(tbl)
##               model  mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1         Mazda RX4 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## 2     Mazda RX4 Wag 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## 3        Datsun 710 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## 4    Hornet 4 Drive 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## 5 Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## 6           Valiant 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

Row and column names

tbl <- read.table(file=csv.file, sep=sep.csv, header=TRUE, row.names=1)
head(tbl)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
row.names(tbl)
##  [1] "Mazda RX4"           "Mazda RX4 Wag"       "Datsun 710"         
##  [4] "Hornet 4 Drive"      "Hornet Sportabout"   "Valiant"            
##  [7] "Duster 360"          "Merc 240D"           "Merc 230"           
## [10] "Merc 280"            "Merc 280C"           "Merc 450SE"         
## [13] "Merc 450SL"          "Merc 450SLC"         "Cadillac Fleetwood" 
## [16] "Lincoln Continental" "Chrysler Imperial"   "Fiat 128"           
## [19] "Honda Civic"         "Toyota Corolla"      "Toyota Corona"      
## [22] "Dodge Challenger"    "AMC Javelin"         "Camaro Z28"         
## [25] "Pontiac Firebird"    "Fiat X1-9"           "Porsche 914-2"      
## [28] "Lotus Europa"        "Ford Pantera L"      "Ferrari Dino"       
## [31] "Maserati Bora"       "Volvo 142E"
rownames(tbl)
##  [1] "Mazda RX4"           "Mazda RX4 Wag"       "Datsun 710"         
##  [4] "Hornet 4 Drive"      "Hornet Sportabout"   "Valiant"            
##  [7] "Duster 360"          "Merc 240D"           "Merc 230"           
## [10] "Merc 280"            "Merc 280C"           "Merc 450SE"         
## [13] "Merc 450SL"          "Merc 450SLC"         "Cadillac Fleetwood" 
## [16] "Lincoln Continental" "Chrysler Imperial"   "Fiat 128"           
## [19] "Honda Civic"         "Toyota Corolla"      "Toyota Corona"      
## [22] "Dodge Challenger"    "AMC Javelin"         "Camaro Z28"         
## [25] "Pontiac Firebird"    "Fiat X1-9"           "Porsche 914-2"      
## [28] "Lotus Europa"        "Ford Pantera L"      "Ferrari Dino"       
## [31] "Maserati Bora"       "Volvo 142E"
colnames(tbl)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"
names(tbl)
##  [1] "mpg"  "cyl"  "disp" "hp"   "drat" "wt"   "qsec" "vs"   "am"   "gear"
## [11] "carb"

Creating a data frame on the fly

mpg <- c(21, 22.8, 18.7)
cyl <- c(6, 4, 8)
hp <- c(110, 93, 175)
df <- data.frame(mpg, cyl, hp)
head(df)
##    mpg cyl  hp
## 1 21.0   6 110
## 2 22.8   4  93
## 3 18.7   8 175
df$mpg
## [1] 21.0 22.8 18.7
df[,c(2:3)]
##   cyl  hp
## 1   6 110
## 2   4  93
## 3   8 175

Programming functions

IF statements

x<-2
if (x>0) {
  log.x <- log2(x)
}
log.x
## [1] 1

IF-ELSE statements

x <- 0.1
if (x>1) {
  log.x <- log2(x)
} else {
  log.x <- 0
}
log.x
## [1] 0

FOR loops

Ys <- c("drat", "disp", "hp","wt")
x <- tbl$mpg
par(mfrow=c(2,2))
for (i in 1:4) {
  y <- tbl[, Ys[i]]
  plot(x,y,xlab="mpg",ylab=Ys[i])
}

par(mfrow=c(2,1))
for (i in 1:4) {
   y <- tbl[, Ys[i]]
   plot(x,y,xlab="mpg",ylab=Ys[i])
}

Nested FOR loops

Ys <- c("drat", "disp", "hp")
Xs <- c("mpg", "gear", "carb")
par(mfrow=c(3,3))
for (i in 1:3) {
  x <- tbl[, Xs[i]]
  for (j in 1:3) {
    y <- tbl[,Ys[j]]
    plot(x,y,xlab=Xs[i], ylab=Ys[j])
  }
}

While loops

i <- 0
square <- 0
while (square <88) {
  i <- i+1
  square <- i*i
}
i-1
## [1] 9

Controlling loops

REPEAT and BREAK

Without a “break” conditional repeats are infinite loops!

i <- 0
square <- 0
repeat {
  i <- i+1
  square <- i^2
  if (square > 88) {
    break
  }
}
i-1
## [1] 9

NEXT in a FOR loop

sum <- 0
num.cars <- 0
for (i in 1:length(tbl$mpg)) {
  if (tbl$cyl[i] > 6) {
    next
  } else {
    sum <- sum + tbl$mpg[i]
    num.cars <- num.cars+1
  }
}
avg.mpg <- sum/num.cars
sum
## [1] 431.5
num.cars
## [1] 18
avg.mpg
## [1] 23.97222

Useful function: AGGREGATE

?aggregate
aggregate(mpg ~ cyl, tbl, mean)
##   cyl      mpg
## 1   4 26.66364
## 2   6 19.74286
## 3   8 15.10000
aggregate(. ~ cyl, tbl, median)
##   cyl  mpg  disp    hp  drat    wt   qsec vs am gear carb
## 1   4 26.0 108.0  91.0 4.080 2.200 18.900  1  1    4  2.0
## 2   6 19.7 167.6 110.0 3.900 3.215 18.300  1  0    4  4.0
## 3   8 15.2 350.5 192.5 3.115 3.755 17.175  0  0    3  3.5
boxplot(tbl$mpg ~ tbl$cyl)

Useful function: BY

?by
by(tbl,tbl$cyl, colMeans)
## tbl$cyl: 4
##         mpg         cyl        disp          hp        drat          wt 
##  26.6636364   4.0000000 105.1363636  82.6363636   4.0709091   2.2857273 
##        qsec          vs          am        gear        carb 
##  19.1372727   0.9090909   0.7272727   4.0909091   1.5454545 
## -------------------------------------------------------- 
## tbl$cyl: 6
##         mpg         cyl        disp          hp        drat          wt 
##  19.7428571   6.0000000 183.3142857 122.2857143   3.5857143   3.1171429 
##        qsec          vs          am        gear        carb 
##  17.9771429   0.5714286   0.4285714   3.8571429   3.4285714 
## -------------------------------------------------------- 
## tbl$cyl: 8
##         mpg         cyl        disp          hp        drat          wt 
##  15.1000000   8.0000000 353.1000000 209.2142857   3.2292857   3.9992143 
##        qsec          vs          am        gear        carb 
##  16.7721429   0.0000000   0.1428571   3.2857143   3.5000000

Useful function: SUMMARY

?summary
summary(tbl)
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000

Useful function: REPLICATE

replicate(5, seq(1:5))
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    1    1    1    1
## [2,]    2    2    2    2    2
## [3,]    3    3    3    3    3
## [4,]    4    4    4    4    4
## [5,]    5    5    5    5    5
replicate(12, rnorm(10))
##             [,1]        [,2]        [,3]        [,4]        [,5]
##  [1,] -0.5888004  0.59497358 -0.99679836  1.04524253 -0.86341685
##  [2,]  1.0465366  1.07545058 -0.83633010  0.05015886 -0.60992582
##  [3,]  0.7199920 -0.87027552  1.68246820 -0.11759672 -0.75995239
##  [4,]  0.4523878 -1.12576206  0.94141585 -1.51774104 -0.96322539
##  [5,] -0.3395558 -0.03584291 -2.29107201 -0.62727313 -1.78127872
##  [6,]  0.1437766  1.33068843  1.37489819 -0.64441970 -0.02520603
##  [7,]  0.4874853  0.24154501 -0.04010748 -0.02975812  1.35494082
##  [8,]  1.8821449 -0.03257613 -1.17785407 -1.87267805  1.73478265
##  [9,] -2.0955304  0.18132440  0.69790416 -1.57181410  1.70029813
## [10,]  0.9050077  2.62534649 -2.14166457  0.86007959 -0.72776162
##              [,6]        [,7]       [,8]       [,9]      [,10]       [,11]
##  [1,] -0.61821648  0.73237359  1.8972047  0.3099001 -0.9885470 -1.23257311
##  [2,] -1.11113986  0.04541198 -1.6248429  1.2035544 -1.2167266  1.71986507
##  [3,] -1.40894888  0.57298086 -1.1608200  0.5404925  0.4758024 -0.43761992
##  [4,]  1.38289995  1.28419619 -2.1794365 -0.2506077  2.3087606  0.07773809
##  [5,]  0.05145302 -0.54124165 -0.3595293 -0.6901134 -0.7159997  0.56552361
##  [6,] -0.41954220  1.32300791  0.4706496  0.4339272  0.9063290 -0.49534668
##  [7,] -0.02153005  0.68085561 -0.2349828 -1.4668098 -1.4123012 -0.22539534
##  [8,] -0.26344931  0.13908305  0.6076552 -0.2016107  1.2845785 -1.20063909
##  [9,]  0.61909616  1.77300686 -0.9155459 -0.3634435  0.1943223 -0.10769318
## [10,] -0.42472702 -2.54070440 -0.7275268 -0.9189445  1.7518248 -0.45812800
##            [,12]
##  [1,] -1.2771804
##  [2,] -0.4257003
##  [3,]  0.6648337
##  [4,]  0.2231123
##  [5,]  1.1110198
##  [6,]  0.6282659
##  [7,]  0.3024816
##  [8,] -0.1638020
##  [9,]  0.4867629
## [10,]  1.4009469

User defined functions

There are lots of built-in functions in R. But, sometimes, you need some code that isn’t.

Functions are just a set of instructions(calculations) that we want to use repeatedly or that, because of their complexity, are better self-contained in a sub-program and called when needed.

Basic FUNCTION elements

function.name <- function(arguments)
{
  #computations on the arguments
  #and some other code
}
square <- function(x)
{
  x^2
}

square(40)
## [1] 1600
k <- 10
square(k)
## [1] 100

Setting a default value or range

square <- function(x=10, n=seq(0.05, 1, by=0.01))
{
  x^n
}
square(2,5)
## [1] 32
square(,2)
## [1] 100
square(2,)
##  [1] 1.035265 1.042466 1.049717 1.057018 1.064370 1.071773 1.079228
##  [8] 1.086735 1.094294 1.101905 1.109569 1.117287 1.125058 1.132884
## [15] 1.140764 1.148698 1.156688 1.164734 1.172835 1.180993 1.189207
## [22] 1.197479 1.205808 1.214195 1.222640 1.231144 1.239708 1.248331
## [29] 1.257013 1.265757 1.274561 1.283426 1.292353 1.301342 1.310393
## [36] 1.319508 1.328686 1.337928 1.347234 1.356604 1.366040 1.375542
## [43] 1.385109 1.394744 1.404445 1.414214 1.424050 1.433955 1.443929
## [50] 1.453973 1.464086 1.474269 1.484524 1.494849 1.505247 1.515717
## [57] 1.526259 1.536875 1.547565 1.558329 1.569168 1.580083 1.591073
## [64] 1.602140 1.613284 1.624505 1.635804 1.647182 1.658639 1.670176
## [71] 1.681793 1.693491 1.705270 1.717131 1.729074 1.741101 1.753211
## [78] 1.765406 1.777685 1.790050 1.802501 1.815038 1.827663 1.840375
## [85] 1.853176 1.866066 1.879045 1.892115 1.905276 1.918528 1.931873
## [92] 1.945310 1.958841 1.972465 1.986185 2.000000

Calling functions in a function

my.fun <- function(X.matrix, y.vec, z.scalar)
{
  sq.scalar <- square(z.scalar,2)
  mult <- X.matrix %*% y.vec
  final <- mult * sq.scalar
  return(final)
}

my.mat <- matrix(c(1:6),3,2)
my.vec <- c(3:4)

my.fun(my.mat, my.vec, 5)
##      [,1]
## [1,]  475
## [2,]  650
## [3,]  825

Function returning a List

my.fun <- function(X.matrix, y.vec, z.scalar) 
{
  sq.scalar <- square(z.scalar,2)
  mult <- X.matrix %*% y.vec
  final <- mult * sq.scalar
  return(list(sq.num=sq.scalar, matmult=final))
}

my.fun(my.mat, my.vec, 5)
## $sq.num
## [1] 25
## 
## $matmult
##      [,1]
## [1,]  475
## [2,]  650
## [3,]  825

Function Best Practices

APPLY functions

# (N)apply(X, MARGIN, FUN, ...)
apply(tbl, 2, sum) #sum of each column in tbl
##      mpg      cyl     disp       hp     drat       wt     qsec       vs 
##  642.900  198.000 7383.100 4694.000  115.090  102.952  571.160   14.000 
##       am     gear     carb 
##   13.000  118.000   90.000
ColMax <- function(x) apply(x,2,max)
ColMax(tbl)
##     mpg     cyl    disp      hp    drat      wt    qsec      vs      am 
##  33.900   8.000 472.000 335.000   4.930   5.424  22.900   1.000   1.000 
##    gear    carb 
##   5.000   8.000
sapply(1:3, function(x) x^2)
## [1] 1 4 9
lapply(1:3, function(x) x^2)
## [[1]]
## [1] 1
## 
## [[2]]
## [1] 4
## 
## [[3]]
## [1] 9
mapply(rep, 1:4, 4:1)
## [[1]]
## [1] 1 1 1 1
## 
## [[2]]
## [1] 2 2 2
## 
## [[3]]
## [1] 3 3
## 
## [[4]]
## [1] 4
tapply(tbl$mpg, tbl$cyl, mean)
##        4        6        8 
## 26.66364 19.74286 15.10000
r <- rainbow(3) #rainbow(n): create a vector of n contiguous colors
choose.col <- function(n) {
  colorvec <- vector(mode="character", length=length(n))
  for (i in 1:length(n)) {
    if (n[i]>3) {
      colorvec[i] <- r[1]
    }
    if (n[i]>5) {
      colorvec[i] <- r[2]
    }
    if (n[i]>7) {
      colorvec[i] <- r[3]
    }
  }
  c(colorvec)
}
col.pch <- choose.col(tbl$cyl)
cbind(tbl$cyl, col.pch)
##           col.pch    
##  [1,] "6" "#00FF00FF"
##  [2,] "6" "#00FF00FF"
##  [3,] "4" "#FF0000FF"
##  [4,] "6" "#00FF00FF"
##  [5,] "8" "#0000FFFF"
##  [6,] "6" "#00FF00FF"
##  [7,] "8" "#0000FFFF"
##  [8,] "4" "#FF0000FF"
##  [9,] "4" "#FF0000FF"
## [10,] "6" "#00FF00FF"
## [11,] "6" "#00FF00FF"
## [12,] "8" "#0000FFFF"
## [13,] "8" "#0000FFFF"
## [14,] "8" "#0000FFFF"
## [15,] "8" "#0000FFFF"
## [16,] "8" "#0000FFFF"
## [17,] "8" "#0000FFFF"
## [18,] "4" "#FF0000FF"
## [19,] "4" "#FF0000FF"
## [20,] "4" "#FF0000FF"
## [21,] "4" "#FF0000FF"
## [22,] "8" "#0000FFFF"
## [23,] "8" "#0000FFFF"
## [24,] "8" "#0000FFFF"
## [25,] "8" "#0000FFFF"
## [26,] "4" "#FF0000FF"
## [27,] "4" "#FF0000FF"
## [28,] "4" "#FF0000FF"
## [29,] "8" "#0000FFFF"
## [30,] "6" "#00FF00FF"
## [31,] "8" "#0000FFFF"
## [32,] "4" "#FF0000FF"

APPLY for repetitive application

r <- rainbow(3) #rainbow(n): create a vector of n contiguous colors
choose.col <- function(n) {
  if (n>3) {
    col.n <- r[1]
  }
  if (n>5) {
    col.n <- r[2]
  }
  if (n>7) {
    col.n <- r[3]
  }
  col.n
}

col.pch <- sapply(tbl$cyl, choose.col)
cbind(tbl$cyl, col.pch)
##           col.pch    
##  [1,] "6" "#00FF00FF"
##  [2,] "6" "#00FF00FF"
##  [3,] "4" "#FF0000FF"
##  [4,] "6" "#00FF00FF"
##  [5,] "8" "#0000FFFF"
##  [6,] "6" "#00FF00FF"
##  [7,] "8" "#0000FFFF"
##  [8,] "4" "#FF0000FF"
##  [9,] "4" "#FF0000FF"
## [10,] "6" "#00FF00FF"
## [11,] "6" "#00FF00FF"
## [12,] "8" "#0000FFFF"
## [13,] "8" "#0000FFFF"
## [14,] "8" "#0000FFFF"
## [15,] "8" "#0000FFFF"
## [16,] "8" "#0000FFFF"
## [17,] "8" "#0000FFFF"
## [18,] "4" "#FF0000FF"
## [19,] "4" "#FF0000FF"
## [20,] "4" "#FF0000FF"
## [21,] "4" "#FF0000FF"
## [22,] "8" "#0000FFFF"
## [23,] "8" "#0000FFFF"
## [24,] "8" "#0000FFFF"
## [25,] "8" "#0000FFFF"
## [26,] "4" "#FF0000FF"
## [27,] "4" "#FF0000FF"
## [28,] "4" "#FF0000FF"
## [29,] "8" "#0000FFFF"
## [30,] "6" "#00FF00FF"
## [31,] "8" "#0000FFFF"
## [32,] "4" "#FF0000FF"

Calling functions in Plot

plot(tbl$mpg, tbl$hp, col=sapply(tbl$cyl, choose.col))
legend("topright", legend=c(4,6,8), col=r, pch=1)

Calling functions

Save and Load

Export a table

Graphical outputs

Putting it all together

r <- rainbow(3) #rainbow(n): create a vector of n contiguous colors
choose.col <- function(n) {
  if (n>3) {
    col.n <- r[1]
  }
  if (n>5) {
    col.n <- r[2]
  }
  if (n>7) {
    col.n <- r[3]
  }
  col.n
}


sep.csv <- c(',') #for tab-delimited# sep.tab <- c('\t')
csv.file <- c("mtcars.csv")
tbl <- read.table(file=csv.file, sep=sep.csv, header=TRUE)

postscript(file="cool_plot.ps", paper="letter", horizontal = TRUE)
plot(tbl$mpg, tbl$hp, col=sapply(tbl$cyl, choose.col))
legend("topright", legend=c(4,6,8), col=r, pch=1)
dev.off()
## png 
##   2
mpg.cyl <- aggregate(mpg ~ cyl, tbl, mean)
write.table(mpg.cyl, file="mpg_mean_cyl.txt", sep="\t", row.names = FALSE)
png(filename = "mpg_by_cyl.png", width=480, height=480)
boxplot(tbl$mpg ~ tbl$cyl)
dev.off()
## png 
##   2

Run script

Command Line Arguments

commandArgs

#args <- commandArgs(TRUE) #get variables from command line
#num1 <- as.numeric(args[1])
#num2 <- as.numeric(args[2])
#
#square <- function (x,n=seq(1, num2, by=1)) {
#  x^n
#}
#
#x <- c(1:num2)
#y <- square(num1)
#png(filename = "exp_plot.png", width = 480, height = 480)
#plot(x,y, ylab=paste(num1, "^x", sep=""))
#dev.off()

argparse

#library(argparse)
#parser <- ArgumentParser()

#parser$add_argument("-n", "--number", type="integer", default=2, help="The number that #will be multiplied by itself", metavar="number")

#parser$add_argument("-x", "--exponent", type="integer", default=10, help="The number of #the times -n is multiplied itself (exponent)", metavar="number")

#args <- parser$parse_args()
#num1 <- args$number
#num2 <- args$exponent

#square <- function (x,n=seq(1, num2, by=1)) {
#  x^n
#}

#x <- c(1:num2)
#y <- square(num1)
#png(filename = "exp_plot2.png", width = 480, height = 480)
#plot(x,y, ylab=paste(num1, "^x", sep=""))
#dev.off()

10-Minute Break before Workshop