R Part 3 - Classes: Modes with structure

Dave White

Intro

Modes are the primitive datatype in R.
Classes are modes with additional structure.
For example, data.frame is a class based upon the list mode.

The list mode allows multiple layering and heterogeneous inclusion of many types.
As we will see the data.frame is a flexible type that includes most features from the other builtin types.
Naturally, Data.frame is usually the final state for your data.

Methods

methods are functions that are encapsualted with a specific class
attributes are data that are encapsulated with a specific class
attributes values typically vary between instances of a class

methods(class=list)
methods(class=dataframe)

Datasets

data()
?mtcars
nrow(mtcars)
ncol(mtcars)

Vector Classes

Matrices

A vector with an interface to make it behave like a matrix

Function

matrix( v, nrow = n, ncol = m, byrow = b)
named arguments

Creation

my.vec=1:6
print(my.vec)
my.matrix <- matrix(my.vec, nrow = 2, ncol = 3)
str(my.matrix)
attributes(my.matrix)

class(my.matrix)
typeof(my.matrix)
is.atomic(my.matrix)
mode(my.matrix)

print(my.matrix)
t(my.matrix)

my.matrix(0,nrow=3,ncol=3)

t(matrix(my.vec, nrow = 3, ncol = 2))

dim(my.matrix)
length(my.matrix)
nrow(my.matrix)
ncol(my.matrix)

Indexing

my.matrix[2,]  # row
my.matrix[,2]  # column
my.matrix[1,2] # element at row 1 column 2
my.matrix[,c(2,3)]
my.matrix[1,c(2,3)]
my.matrix[-1,c(2,3)]
my.matrix[4]

tail(my.matrix,n=1) # doesn't work the same
my.matrix[-length(my.matrix)]
e<-length(my.matrix)
my.matrix[-c(e,e-1)]

Append

my.new.matrix <- cbind(my.matrix, c(7, 8)) # append column
print(my.new.matrix)
my.new.matrix <- rbind(my.new.matrix, c(9, 10, 11, 12)) # accounts for new column

cbind(c(7,8),my.matrix)
cbind(c(7,8),my.matrix[,c(2,3)],my.matrix[,c(1,2)])

Match

10 %in% my.new.matrix
match(10,my.new.matrix)
my.new.matrix[10]

which(my.new.matrix == 10, arr.ind=TRUE)
which(my.new.matrix == 10)

Math

my.matrix+3
my.matrix*3
my.matrix+my.matrix
my.matrix*my.matrix

diag(4)
diag(4)*3

my.matrix %*% t(my.matrix) # matrix mulitplication

my.matrix=matrix(runif(9),nrow=3)
out <- eigen(my.matrix)
out$values
out$vectors

Arrays

array( v, dim=c(d1,d2,d3…))

my.array <- array((runif(27)-1)*20, dim=c(3,3,3))
print(my.array)
dim(my.array)
my.array[3,,] # row 3
my.array[,3,] # column 3
my.array[,,3] # column 3

Factors

atomic vector class
best way to store categorical data

similar to atomic vector
levels - possible values

data <- c('R','python','Julia','matlab','python','matlab','matlab')

my.fac <- factor(data)
print(my.fac)

levels(my.fac)
# reorder levels
my.fac <- factor(my.fac,levels = c('python','Julia','R','matlab'))
print(my.fac)
levels(my.fac)

my.fac <- gl(4,3,labels=levels(my.fac))
print(my.fac)
# n = numeber of levels
# k = number of repeats
# labels = names

length(my.fac)
my.fac[1]
my.fac[4] <- 'matlab'
print(my.fac)


# add new level
levels(my.fac) <- c(levels(my.fac), 'Perl') # ADD NEW LEVEL
my.fac[4] <- 'perl'

# change value

# rename level
levels(my.fac)[levels(my.fac)=='Python']  <- "python"
print(my.fac)

levels(my.factor)
length(my.factor)
nlevels(r)

table(my.fac)
summary(my.fac)

sample(my.fac,3)

List Classes

Named List

hard to use

my.list=list(name=c("Dave","Mel","Brian"),age=c(31,32,40))

my.list['name']
my.list[1]
names(my.list)

data.frame

Named lists + matrices + optional factoring

Calling

numbers will
data.frame(c1,c2,…cn, args)
ARGS

Create

favoriteangle <- c(pi, pi/2, pi/4)
my.data <- data.frame(number = 10:12,
                      isblind = c(FALSE, FALSE, TRUE),
                      haircolor = c("bald", "brown", "red"),
                      favoriteangle,
                      row.names = c("Dave","MEL","BRIAN"),
                      stringsAsFactors = FALSE
                      )

print(my.data)
summary(my.data)
str(my.data) # structure

names(my.data)
rownames(my.data)
colnames(my.data)

text columns will be cast to factors by default

Add info

# RANAME ROWS
rownames(my.data) <- c("Dave","Mel","Brian")
print(my.data)

# RANAME COLUMNS
colnames(my.data) <- c("score","isBlind","hairColor","favoriteAngle")
print(my.data)
comment(my.data) <- "Need more subjects!"
comment(my.data)

Summarize

attributes(my.data)
dim(my.data)
ncol(my.data)
nrow(my.data)
length(my.data)
head(my.data,n=2)
tail(my.data,n=3)

File to data.frame

read.csv('/path/to/my/file', header=TRUE,sep=",")
read.xls()
read.spss()

Convert to data.frame

# named list
my.list.f <- as.data.frame(my.list)
print(my.list.f)

rownames(my.list.f) <- my.list[[1]]
my.list.f[,1] <- NULL

# matrix
my.mat.f <- as.data.frame(my.matrix)
print(my.mat.f)
colnames(my.mat.f) <- c("col1","col2","col3")

Index

Just like matrices
Indexing creates smaller data.frames

my.data[1:3, ]         # get first 3 rows
my.data[[9]]
my.data[c(2,3) c(4,5)] # get elements at 2,4 and 3,5

my.data$c1             # *simple, and what makes dataframes shine
my.data["Dave",]
my.data[,"hairColor"]
my.data["hairColor"]
my.data["Dave","hairColor"]

# reassign
my.data["Dave","favoriteAngle"] <- pi/3
print(my.data)

Match

# MATCH
my.data[my.data$isBlind == TRUE]
subset(my.data, isBlind==TRUE)
subset(my.data, isBlind==TRUE | hairColor=="brown")

unique(my.data$isBlind)
duplicated(my.data$isBlind)


my.data <- rbind(my.data,my.data[1,])
print(my.data)
duplicated(my.data)
# remove duplicated
my.data <- my.data[!duplicated(my.data),]

Reorder

# reorder columns
my.data <- my.data[,c(1,3,2,4)]
print(my.data)
my.data <- my.data[c("favoriteAngle","isBlind", "score", "hairColor")]


# sort rows
my.data[order(rownames(my.data)),]

# sor tcolumns
my.data[,order(colnames(my.data))]

# multiple sort
#dd[with(dd, order(-z, b)), ]

ADD

# ADD COLUMNS
my.data <-cbind(my.data, Strength=c(110, 110, 110))
my.data$stretngth <-c(110, 110, 110))

# add rows
my.data <- rbind(my.data, Mike = c(.8, FALSE, 12, "brown", 112))

RM

# RM column
my.data$strength <- NULL
print(my.data)


# RM ROW
my.data[rownames(my.data)!='Dave',]
my.data[-3,]
subset(my.data, !rownames(my.data) %in% "Dave")

Merge

#Outer join:
merge(x = my.data, y = df2, by = "CustomerId", all = TRUE)
#Left outer:
merge(x = my.data, y = df2, by = "CustomerId", all.x = TRUE)
#Right outer:
merge(x = my.data, y = df2, by = "CustomerId", all.y = TRUE)
#Cross join:
merge(x = my.data, y = df2, by = NULL)

Apply: Looping without loops

  1. Arrays

    apply(ARRAY,MARGIN,FUN)

    # By Row
    apply(my.mdata,1,sum)
    
    # By Column
    apply(my.mdata,1,sum)
    
    # By cell
    apply(my.mdata,1:2, function(x) x+3)
  2. Lists & Vectors

    lapply(LIST,FUN)

    sapply(X,FUN)

    vapply(X,FUN,TYPE)

    # list
    lapply(list(1,2,3),sum)
    
    # vectors
    lapply(as.list(c(1,2,3)),sum)
    
    # simplified
    sapply(list(1,2,3),sum)
    
    # simplified
    vapply