You are on page 1of 10

Random Forests

YIK LUN, KEI


allen29@ucla.edu
This paper is a lab from the book called An Introduction to Statistical Learning
with Applications in R. All R codes and comments below are belonged to the
book and authors.

Bagging
mtry=13 indicates that all 13 predictors should be considered for each split of the treein
other words, that bagging should be done.

library(MASS)
library(randomForest)

## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
attach(Boston)
set.seed(1)
train = sample (1: nrow(Boston),nrow(Boston)/2)
boston.test=Boston[-train ,"medv"]
bag.boston=randomForest(medv~.,data=Boston,subset=train,mtry=13,importance =TRUE)
bag.boston

##
## Call:
## randomForest(formula = medv ~ ., data = Boston, mtry = 13, importance = TRUE,
##
Type of random forest: regression
##
Number of trees: 500
## No. of variables tried at each split: 13
##
##
Mean of squared residuals: 11.08966
##
% Var explained: 86.57
plot(bag.boston)

subset = train)

10

15

Error

20

25

bag.boston

100

200

300
trees

yhat.bag=predict(bag.boston,newdata=Boston[-train,])
plot(yhat.bag, boston.test)
abline(0,1)

400

500

50
40
30
20
10

boston.test

10

20

30

40

50

yhat.bag

mean((yhat.bag-boston.test)^2)

## [1] 13.33831
bag.boston =randomForest(medv~.,data=Boston,subset =train,mtry=13, importance=T,ntree =25)
bag.boston

##
## Call:
## randomForest(formula = medv ~ ., data = Boston, mtry = 13, importance = T,
##
Type of random forest: regression
##
Number of trees: 25
## No. of variables tried at each split: 13
##
##
Mean of squared residuals: 12.92873
##
% Var explained: 84.35
yhat.bag = predict(bag.boston ,newdata=Boston[-train,])
mean((yhat.bag -boston.test)^2)

## [1] 14.41793

ntree = 25, subset =

Random Forests (min MSE at mtry=5,6,7)

16
12

14

MSE

18

20

set.seed(2)
MSE<-rep(0,13)
for(i in 1:13){ # find best mtry
bag.boston =randomForest(medv~.,data=Boston,subset=train,mtry=i,importance=T)
yhat.bag = predict(bag.boston ,newdata=Boston[-train,])
MSE[i]<-mean((yhat.bag -boston.test)^2)
}
plot(1:13,MSE,type="b")

10

12

1:13

rf.boston =randomForest(medv~.,data=Boston,subset =train,mtry=6,importance =TRUE)


yhat.rf=predict(rf.boston,newdata=Boston[-train,])
mean((yhat.rf-boston.test)^2)
## [1] 11.35209
importance(rf.boston)
##
## crim
## zn
## indus

%IncMSE IncNodePurity
12.421853
1126.72987
3.435084
45.48735
9.875513
1057.45440
4

##
##
##
##
##
##
##
##
##
##

chas
nox
rm
age
dis
rad
tax
ptratio
black
lstat

1.760632
13.420937
31.822013
10.611374
13.838517
3.148078
8.404814
11.932105
7.215369
29.282301

62.54896
1029.92004
6463.62153
532.12941
1268.48617
88.94067
483.25198
863.51232
386.88513
7140.69398

varImpPlot(rf.boston )

rf.boston

rm
lstat
dis
nox
crim
ptratio
age
indus
tax
black
zn
rad
chas

lstat
rm
dis
crim
indus
nox
ptratio
age
tax
black
rad
chas
zn
5

10

15 20 25
%IncMSE

30

2000 4000 6000


IncNodePurity

Boosting
If classification problem, use distribution=bernoulli
We can also produce partial dependence plots for these two variables. These plots illustrate
the marginal effect of the selected variables on the response after integrating out the other
variables. In this case, as we might expect, median house prices are increasing with rm and
decreasing with lstat.

library(gbm)
##
##
##
##
##

Loading required
Loading required
Loading required
Loading required
Loaded gbm 2.1.1

package:
package:
package:
package:

survival
lattice
splines
parallel

zn

indus

black

crim

lstat

set.seed(1)
boost.boston =gbm(medv~.,data=Boston[train,],distribution="gaussian",n.trees =5000, interaction.depth =4
summary(boost.boston)

10

20
Relative influence

##
##
##
##
##
##
##
##
##
##
##
##

var
rel.inf
lstat
lstat 45.9627334
rm
rm 31.2238187
dis
dis 6.8087398
crim
crim 4.0743784
nox
nox 2.5605001
ptratio ptratio 2.2748652
black
black 1.7971159
age
age 1.6488532
tax
tax 1.3595005
indus
indus 1.2705924
chas
chas 0.8014323
6

30

40

## rad
## zn

rad
zn

0.2026619
0.0148083

28
26
22

24

f(rm)

30

32

plot(boost.boston,i="rm")# partial dependence plot (relation between medv and rm)

6
rm

plot(boost.boston ,i="lstat")

30
25
20

f(lstat)

10

15

20

25

30

35

lstat

yhat.boost=predict(boost.boston,newdata =Boston[-train,],n.trees =5000)


mean((yhat.boost-boston.test)^2)

## [1] 11.84434

MSE<-rep(0,7)
lambda<-c(0.00001,0.0001,0.001,0.01,0.1,0.15,0.2)
for(i in 1:7){
boost.boston =gbm(medv~.,data=Boston[train,],distribution="gaussian",n.trees =5000,interaction.depth =
yhat.boost=predict (boost.boston ,newdata =Boston [-train ,],n.trees =5000)
MSE[i]<-mean((yhat.boost -boston.test)^2)
}
plot(1:7,MSE,type="b")

10 20 30 40 50 60 70 80

MSE

1:7

MSE<-rep(0,8)
size<-c(500,1000,2000,4000,5000,7000,8000,10000)
for(i in 1:8){
boost.boston =gbm(medv~.,data=Boston[train,],distribution="gaussian",n.trees =size[i],interaction.dept
yhat.boost=predict (boost.boston ,newdata =Boston [-train ,],n.trees =size[i])
MSE[i]<-mean((yhat.boost -boston.test)^2)
}
plot(1:8,MSE,type="b")

12.0
11.5
11.0
10.5

MSE

1:8

boost.boston =gbm(medv~.,data=Boston[train,],distribution="gaussian",n.trees =10000,interaction.depth =4


yhat.boost=predict (boost.boston ,newdata =Boston [-train ,],n.trees =5000)
mean((yhat.boost -boston.test)^2)

## [1] 10.39679

Reference:
James, Gareth, et al. An introduction to statistical learning. New
York: springer, 2013.

10

You might also like