R Programming Quick Notes :: Part - 3


Bhaskar S 03/18/2017


Overview

In Part - 2, we explored the collection types vector, list, matrix, and data.frame in R.

In this part, we will dive into factor type, date and time types, control structures, and functions.

Hands-on With R - III

Factor Type

Let us get started with the factor class in R.

A factor is an object that represent categorical data (finite discrete values) in R. They are used quite extensively in statistical modeling. We use the factor() function to create the factor object in R.

As an example, to create a factor object to represents the level of education, execute the following in R:

a <- factor(c('HS', 'BS', 'MS', 'PHD'))

a

The following is the typical output:

Output (a)

> a
[1] HS  BS  MS  PHD
Levels: BS HS MS PHD
      

From the above output we see the order of factor levels is based on lexical ordering. To set the desired order of levels, use the levels argument in the factor() function as shown below:

a <- factor(c('HS', 'BS', 'MS', 'PHD'), levels = c('HS', 'BS', 'MS', 'PHD'))

a

The following is the typical output:

Output (a)

> a
[1] HS  BS  MS  PHD
Levels: HS BS MS PHD
      

Date and Time Types

Let us now shift gears to explore Date, POSIXlt, and POSIXct types in R.

Let us create the following R script named date_time_ops.R in RStudio:

date_time_ops.R
#
# Date and Time operations
#

a <- Sys.Date()
a

class(a)

unclass(a)

b <- as.Date("1970-01-02")

class(b)

unclass(b)

c <- as.Date("02/01/1970", format = "%m/%d/%Y")

class(c)

unclass(c)

d <- Sys.time()
d

class(d)

unclass(d)

e <- as.POSIXlt(d)
e

class(e)

unclass(e)

e$hour

f <- as.POSIXct(d)
f

class(f)

unclass(f)

g <- strptime('Sun January 25, 1970 16:30:55', '%a %B %d, %Y %H:%M:%S')
g

class(g)

unclass(g)
      

Execute the R script date_time_ops.R in RStudio and the following is the output:

Output (date_time_ops.R)

> a <- Sys.Date()
> a
[1] "2017-03-17"
>
> class(a)
[1] "Date"
>
> unclass(a)
[1] 17242
>
> b <- as.Date("1970-01-02")
>
> class(b)
[1] "Date"
>
> unclass(b)
[1] 1
>
> c <- as.Date("02/01/1970", format = "%m/%d/%Y")
>
> class(c)
[1] "Date"
>
> unclass(c)
[1] 31
>
> d <- Sys.time()
> d
[1] "2017-03-17 19:24:10 EDT"
>
> class(d)
[1] "POSIXct" "POSIXt"
>
> unclass(d)
[1] 1489793050
>
> e <- as.POSIXlt(d)
> e
[1] "2017-03-17 19:24:10 EDT"
>
> class(e)
[1] "POSIXlt" "POSIXt"
>
> unclass(e)
$sec
[1] 10.41346

$min
[1] 24

$hour
[1] 19

$mday
[1] 17

$mon
[1] 2

$year
[1] 117

$wday
[1] 5

$yday
[1] 75

$isdst
[1] 1

$zone
[1] "EDT"

$gmtoff
[1] -14400

attr(,"tzone")
[1] ""    "EST" "EDT"
>
> e$hour
[1] 19
>
> f <- as.POSIXct(d)
> f
[1] "2017-03-17 19:24:10 EDT"
>
> class(f)
[1] "POSIXct" "POSIXt"
>
> unclass(f)
[1] 1489793050
>
> g <- strptime('Sun January 25, 1970 16:30:55', '%a %B %d, %Y %H:%M:%S')
> g
[1] "1970-01-25 16:30:55 EST"
>
> class(g)
[1] "POSIXlt" "POSIXt"
>
> unclass(g)
$sec
[1] 55

$min
[1] 30

$hour
[1] 16

$mday
[1] 25

$mon
[1] 0

$year
[1] 70

$wday
[1] 0

$yday
[1] 24

$isdst
[1] 0

$zone
[1] "EST"

$gmtoff
[1] NA
      

The Sys.Date() function returns the current day as an object of type Date that represents the number of days since 1970-01-01.

The as.Date() function takes a character string in the format %Y-%m-%d, where %Y represents the 4-digit year, %m represents the month (01 through 12), and %d the daya (01 through 31), and returns an object of type Date. One can also specify a custom format string using the format argument.

The Sys.time() function returns the current time as an object of type POSIXct that represents the number of seconds since 1970-01-01.

The as.POSIXlt() function takes and object of type POSIXct as the input and returns an object of type POSIXlt that is a vector of named attributes such as sec (for seconds), min (for minutes), hour (for hour), mday (for day of the month), mon (for month 0 through 11), year (for years since 1970), wday (for the day of the week 0 through 6, 0 for Sunday), yday (for day of the year), etc. One can access the named attributes using the syntax $attr (Ex: e$hour, where e is an object of type POSIXlt).

The strptime() function takes two arguments - date-time in a character string format and a format string and returns an object of type POSIXlt.

Control Structures

Let us now move on to explore the various control structures in R.

The following are the supported if statements in R:

Let us create the following R script named if_statements.R in RStudio:

if_statements.R
#
# if, if-else, if-else if-else, ifelse statements
#

a <- 4
b <- 7

if (a <= 5) {
  print('a is <= 5')
}

if (b %% 2 == 0) {
  print('b is divisible by 2')
} else {
  print('b is NOT divisible by 2')
}

if (a <= 3) {
  print('a is <= 3')
} else if (a <= 6) {
  print('a is > 3 and <= 6')
} else {
  print('a is > 6 and <= 10')
}

ifelse(b > 5, 2*a, 3*a)
      

Execute the R script if_statements.R in RStudio and the following is the output:

Output (if_statements.R)

> a <- 4
> b <- 7
>
> if (a <= 5) {
+   print('a is <= 5')
+ }
[1] "a is <= 5"
>
> if (b %% 2 == 0) {
+   print('b is divisible by 2')
+ } else {
+   print('b is NOT divisible by 2')
+ }
[1] "b is NOT divisible by 2"
>
> if (a <= 3) {
+   print('a is <= 3')
+ } else if (a <= 6) {
+   print('a is > 3 and <= 6')
+ } else {
+   print('a is > 6 and <= 10')
+ }
[1] "a is > 3 and <= 6"
>
> ifelse(b > 5, 2*a, 3*a)
[1] 8
      

The following are the supported loop statements in R:

Let us create the following R script named loop_statements.R in RStudio:

loop_statements.R
#
# for, while, repeat statements
#

a <- 1:5

for (i in a) {
  print(paste('for: i ->', i))
}

b <- sample(1:100, 10)

even <- 0
odd <- 0
for (x in b) {
  if (x %% 2 == 0) {
    even <- even + 1
  } else {
    odd <- odd + 1
  }
}
print(paste('There are', even, 'evens and', odd, 'odds'))

count <- 0
while (count <= 5) {
  print(paste('while: count ->', count))
  count <- count + 1
}

count <- 1
repeat {
  print(paste('repeat: count ->', count))
  count <- count + 1
  if (count >= 5) {
    break
  }
}
      

Execute the R script loop_statements.R in RStudio and the following is the output:

Output (loop_statements.R)

> a <- 1:5
>
> for (i in a) {
+   print(paste('for: i ->', i))
+ }
[1] "for: i -> 1"
[1] "for: i -> 2"
[1] "for: i -> 3"
[1] "for: i -> 4"
[1] "for: i -> 5"
>
> b <- sample(1:100, 10)
>
> even <- 0
> odd <- 0
> for (x in b) {
+   if (x %% 2 == 0) {
+     even <- even + 1
+   } else {
+     odd <- odd + 1
+   }
+ }
> print(paste('There are', even, 'evens and', odd, 'odds'))
[1] "There are 2 evens and 8 odds"
>
> count <- 0
> while (count <= 5) {
+   print(paste('while: count ->', count))
+   count <- count + 1
+ }
[1] "while: count -> 0"
[1] "while: count -> 1"
[1] "while: count -> 2"
[1] "while: count -> 3"
[1] "while: count -> 4"
[1] "while: count -> 5"
>
> count <- 1
> repeat {
+   print(paste('repeat: count ->', count))
+   count <- count + 1
+   if (count >= 5) {
+     break
+   }
+ }
[1] "repeat: count -> 1"
[1] "repeat: count -> 2"
[1] "repeat: count -> 3"
[1] "repeat: count -> 4"
      

Functions

Finally, we are ready to explore functions in R.

To defines a function in R, use the following syntax:

  function(arg1, arg2, arg3 = val3, ...) { body }

To following are some of the facts about functions in R:

Let us create the following R script named function_usage.R in RStudio:

function_usage.R
#
# function usage
#

a <- function() {
  x <- Sys.time()
  y <- getwd()
  print(sprintf('----- [ Current environment ] -----'))
  print(sprintf('    Current time: %s', x))
  print(sprintf('    Current working directory: %s', y))
  print(sprintf('-----------------------------------'))
}

a()

class(a)

b <- function(x, y) {
  print(sprintf('Side 1 of triangle - %s', x))
  print(sprintf('Side 2 of triangle - %s', y))
  sqrt(x ^ 2 + y ^ 2)
}

b(5, 6)

c <- function(x, y = 2) {
  print(sprintf('Input number - %s', x))
  print(sprintf('Raise to the power - %s', y))
  x ^ y
}

c(5)

c(5, 3)

c(x = 6)

c(x = 6, y = 4)

d <- function(y) {
  function(x, p = y) {
    x ^ p
  }
}

p2 <- d(2)
p3 <- d(3)

p2(5)

p3(5)
      

Execute the R script function_usage.R in RStudio and the following is the output:

Output (function_usage.R)

> a <- function() {
+   x <- Sys.time()
+   y <- getwd()
+   print(sprintf('----- [ Current environment ] -----'))
+   print(sprintf('    Current time: %s', x))
+   print(sprintf('    Current working directory: %s', y))
+   print(sprintf('-----------------------------------'))
+ }
>
> a()
[1] "----- [ Current environment ] -----"
[1] "    Current time: 2017-03-18 13:41:26"
[1] "    Current working directory: /home/bswamina/MyProjects/R"
[1] "-----------------------------------"
>
> class(a)
[1] "function"
>
> b <- function(x, y) {
+   print(sprintf('Side 1 of triangle - %s', x))
+   print(sprintf('Side 2 of triangle - %s', y))
+   sqrt(x ^ 2 + y ^ 2)
+ }
>
> b(5, 6)
[1] "Side 1 of triangle - 5"
[1] "Side 2 of triangle - 6"
[1] 7.81025
>
> c <- function(x, y = 2) {
+   print(sprintf('Input number - %s', x))
+   print(sprintf('Raise to the power - %s', y))
+   x ^ y
+ }
>
> c(5)
[1] "Input number - 5"
[1] "Raise to the power - 2"
[1] 25
>
> c(5, 3)
[1] "Input number - 5"
[1] "Raise to the power - 3"
[1] 125
>
> c(x = 6)
[1] "Input number - 6"
[1] "Raise to the power - 2"
[1] 36
>
> c(x = 6, y = 4)
[1] "Input number - 6"
[1] "Raise to the power - 4"
[1] 1296
>
> d <- function(y) {
+   function(x, p = y) {
+     x ^ p
+   }
+ }
>
> p2 <- d(2)
> p3 <- d(3)
>
> p2(5)
[1] 25
>
> p3(5)
[1] 125
      

More to come in Part-4 ...

References

R Programming Quick Notes :: Part - 1

R Programming Quick Notes :: Part - 2