Modes are the primitive datatype in R.
Classes are modes with additional structure.
For example, data.frame is a class based upon the list mode.
The list mode allows multiple layering and heterogeneous inclusion of many types.
As we will see the data.frame is a flexible type that includes most features from the other builtin types.
Naturally, Data.frame is usually the final state for your data.
methods are functions that are encapsualted with a specific class
attributes are data that are encapsulated with a specific class
attributes values typically vary between instances of a class
methods(class=list)
methods(class=dataframe)
data()
?mtcarsnrow(mtcars)
ncol(mtcars)
A vector with an interface to make it behave like a matrix
matrix( v, nrow = n, ncol = m, byrow = b)
named arguments
=1:6
my.vecprint(my.vec)
<- matrix(my.vec, nrow = 2, ncol = 3)
my.matrix str(my.matrix)
attributes(my.matrix)
class(my.matrix)
typeof(my.matrix)
is.atomic(my.matrix)
mode(my.matrix)
print(my.matrix)
t(my.matrix)
my.matrix(0,nrow=3,ncol=3)
t(matrix(my.vec, nrow = 3, ncol = 2))
dim(my.matrix)
length(my.matrix)
nrow(my.matrix)
ncol(my.matrix)
2,] # row
my.matrix[2] # column
my.matrix[,1,2] # element at row 1 column 2
my.matrix[c(2,3)]
my.matrix[,1,c(2,3)]
my.matrix[-1,c(2,3)]
my.matrix[4]
my.matrix[
tail(my.matrix,n=1) # doesn't work the same
-length(my.matrix)]
my.matrix[<-length(my.matrix)
e-c(e,e-1)] my.matrix[
<- cbind(my.matrix, c(7, 8)) # append column
my.new.matrix print(my.new.matrix)
<- rbind(my.new.matrix, c(9, 10, 11, 12)) # accounts for new column
my.new.matrix
cbind(c(7,8),my.matrix)
cbind(c(7,8),my.matrix[,c(2,3)],my.matrix[,c(1,2)])
10 %in% my.new.matrix
match(10,my.new.matrix)
10]
my.new.matrix[
which(my.new.matrix == 10, arr.ind=TRUE)
which(my.new.matrix == 10)
+3
my.matrix*3
my.matrix+my.matrix
my.matrix*my.matrix
my.matrix
diag(4)
diag(4)*3
%*% t(my.matrix) # matrix mulitplication
my.matrix
=matrix(runif(9),nrow=3)
my.matrix<- eigen(my.matrix)
out $values
out$vectors out
array( v, dim=c(d1,d2,d3…))
<- array((runif(27)-1)*20, dim=c(3,3,3))
my.array print(my.array)
dim(my.array)
3,,] # row 3
my.array[3,] # column 3
my.array[,3] # column 3 my.array[,,
atomic vector class
best way to store categorical data
similar to atomic vector
levels - possible values
<- c('R','python','Julia','matlab','python','matlab','matlab')
data
<- factor(data)
my.fac print(my.fac)
levels(my.fac)
# reorder levels
<- factor(my.fac,levels = c('python','Julia','R','matlab'))
my.fac print(my.fac)
levels(my.fac)
<- gl(4,3,labels=levels(my.fac))
my.fac print(my.fac)
# n = numeber of levels
# k = number of repeats
# labels = names
length(my.fac)
1]
my.fac[4] <- 'matlab'
my.fac[print(my.fac)
# add new level
levels(my.fac) <- c(levels(my.fac), 'Perl') # ADD NEW LEVEL
4] <- 'perl'
my.fac[
# change value
# rename level
levels(my.fac)[levels(my.fac)=='Python'] <- "python"
print(my.fac)
levels(my.factor)
length(my.factor)
nlevels(r)
table(my.fac)
summary(my.fac)
sample(my.fac,3)
hard to use
=list(name=c("Dave","Mel","Brian"),age=c(31,32,40))
my.list
'name']
my.list[1]
my.list[names(my.list)
Named lists + matrices + optional factoring
numbers will
data.frame(c1,c2,…cn, args)
ARGS
<- c(pi, pi/2, pi/4)
favoriteangle <- data.frame(number = 10:12,
my.data isblind = c(FALSE, FALSE, TRUE),
haircolor = c("bald", "brown", "red"),
favoriteangle,row.names = c("Dave","MEL","BRIAN"),
stringsAsFactors = FALSE
)
print(my.data)
summary(my.data)
str(my.data) # structure
names(my.data)
rownames(my.data)
colnames(my.data)
text columns will be cast to factors by default
# RANAME ROWS
rownames(my.data) <- c("Dave","Mel","Brian")
print(my.data)
# RANAME COLUMNS
colnames(my.data) <- c("score","isBlind","hairColor","favoriteAngle")
print(my.data)
comment(my.data) <- "Need more subjects!"
comment(my.data)
attributes(my.data)
dim(my.data)
ncol(my.data)
nrow(my.data)
length(my.data)
head(my.data,n=2)
tail(my.data,n=3)
read.csv('/path/to/my/file', header=TRUE,sep=",")
read.xls()
read.spss()
# named list
<- as.data.frame(my.list)
my.list.f print(my.list.f)
rownames(my.list.f) <- my.list[[1]]
1] <- NULL
my.list.f[,
# matrix
<- as.data.frame(my.matrix)
my.mat.f print(my.mat.f)
colnames(my.mat.f) <- c("col1","col2","col3")
Just like matrices
Indexing creates smaller data.frames
1:3, ] # get first 3 rows
my.data[9]]
my.data[[c(2,3) c(4,5)] # get elements at 2,4 and 3,5
my.data[
$c1 # *simple, and what makes dataframes shine
my.data"Dave",]
my.data["hairColor"]
my.data[,"hairColor"]
my.data["Dave","hairColor"]
my.data[
# reassign
"Dave","favoriteAngle"] <- pi/3
my.data[print(my.data)
# MATCH
$isBlind == TRUE]
my.data[my.datasubset(my.data, isBlind==TRUE)
subset(my.data, isBlind==TRUE | hairColor=="brown")
unique(my.data$isBlind)
duplicated(my.data$isBlind)
<- rbind(my.data,my.data[1,])
my.data print(my.data)
duplicated(my.data)
# remove duplicated
<- my.data[!duplicated(my.data),]
my.data
# reorder columns
<- my.data[,c(1,3,2,4)]
my.data print(my.data)
<- my.data[c("favoriteAngle","isBlind", "score", "hairColor")]
my.data
# sort rows
order(rownames(my.data)),]
my.data[
# sor tcolumns
order(colnames(my.data))]
my.data[,
# multiple sort
#dd[with(dd, order(-z, b)), ]
# ADD COLUMNS
<-cbind(my.data, Strength=c(110, 110, 110))
my.data $stretngth <-c(110, 110, 110))
my.data
# add rows
<- rbind(my.data, Mike = c(.8, FALSE, 12, "brown", 112)) my.data
# RM column
$strength <- NULL
my.dataprint(my.data)
# RM ROW
rownames(my.data)!='Dave',]
my.data[-3,]
my.data[subset(my.data, !rownames(my.data) %in% "Dave")
#Outer join:
merge(x = my.data, y = df2, by = "CustomerId", all = TRUE)
#Left outer:
merge(x = my.data, y = df2, by = "CustomerId", all.x = TRUE)
#Right outer:
merge(x = my.data, y = df2, by = "CustomerId", all.y = TRUE)
#Cross join:
merge(x = my.data, y = df2, by = NULL)
Arrays
apply(ARRAY,MARGIN,FUN)
# By Row
apply(my.mdata,1,sum)
# By Column
apply(my.mdata,1,sum)
# By cell
apply(my.mdata,1:2, function(x) x+3)
Lists & Vectors
lapply(LIST,FUN)
sapply(X,FUN)
vapply(X,FUN,TYPE)
# list
lapply(list(1,2,3),sum)
# vectors
lapply(as.list(c(1,2,3)),sum)
# simplified
sapply(list(1,2,3),sum)
# simplified
vapply