You are on page 1of 9

Simple Linear Regression

YIK LUN, KEI


allen29@ucla.edu

Data and Correlation


library("MASS")
data(cats)
attach(cats)
cor(Bwt, Hwt)
## [1] 0.8041274
cor.test(Bwt,Hwt)
##
##
##
##
##
##
##
##
##
##
##

Pearson's product-moment correlation


data: Bwt and Hwt
t = 16.119, df = 142, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.7375682 0.8552122
sample estimates:
cor
0.8041274

cor.test(~ Bwt + Hwt,subset=(Sex=="F"))


##
##
##
##
##
##
##
##
##
##
##

Pearson's product-moment correlation


data: Bwt and Hwt
t = 4.2152, df = 45, p-value = 0.0001186
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.2890452 0.7106399
sample estimates:
cor
0.5320497

Simple Linear Regression


Plot

ci.plot(model1)

95% confidence and prediction intervals for model1


x
20

15

Hwt

observed
fit
conf int
pred int
10

5
2.0

2.5

3.0

3.5

Bwt
plot(Bwt, Hwt, type = "n",xlab="Body Weight in kg",ylab="Heart Weight in g",
main="Heart Weight vs. Body Weight of Cats")
points(Bwt[Sex=="F"],Hwt[Sex=="F"],pch=16,col="red")
points(Bwt[Sex=="M"],Hwt[Sex=="M"],pch=17,col="blue")
legend("topleft",c("Female", "Male"),pch = 16:17, col=c("red","blue"),title = "Gender")

10 12 14 16 18 20

Gender
Female
Male

Heart Weight in g

Heart Weight vs. Body Weight of Cats

2.0

2.5

3.0
Body Weight in kg

Box-Cox
library(MASS)
library(car)
##
## Attaching package: 'car'
##
## The following objects are masked from 'package:HH':
##
##
logit, vif
data("trees")
attach(trees)
model2<-lm(Volume~Height+Girth)
summary(model2)
##
## Call:
## lm(formula = Volume ~ Height + Girth)
##
## Residuals:
3

3.5

##
##
##
##
##
##
##
##
##
##
##
##
##
##

Min
1Q Median
-6.4065 -2.6493 -0.2876

3Q
2.2003

Max
8.4847

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -57.9877
8.6382 -6.713 2.75e-07 ***
Height
0.3393
0.1302
2.607
0.0145 *
Girth
4.7082
0.2643 17.816 < 2e-16 ***
--Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.882 on 28 degrees of freedom
Multiple R-squared: 0.948, Adjusted R-squared: 0.9442
F-statistic:
255 on 2 and 28 DF, p-value: < 2.2e-16

18

10

20

30

40

50

60

70

Normal QQ
31
2

31

Residuals

Residuals vs Fitted

Standardized residuals

par(mfrow=c(2, 2))
plot(model2)

18

20

30

40

50

60

70

Fitted values

Residuals vs Leverage
31

18

10

Cook's distance
18

0.00

0.05

0.10

0.15

Leverage

par(mfrow=c(1, 1))
b<-boxcox(model2)

0.5

1.0

31

Standardized residuals

ScaleLocation
2

Theoretical Quantiles

0.0

Standardized residuals

Fitted values

0.5

0.20

30 20 10

logLikelihood

10

20

95%

bc<-cbind(b$x,b$y)
head(bc[order(-b$y),])
##
##
##
##
##
##
##

[1,]
[2,]
[3,]
[4,]
[5,]
[6,]

[,1]
0.3030303
0.3434343
0.2626263
0.3838384
0.2222222
0.4242424

[,2]
25.37387
25.29521
25.26305
25.02960
24.97017
24.58202

model3<-powerTransform(Volume~Height+Girth,data=trees)
summary(model3)
##
##
##
##
##
##
##
##
##

bcPower Transformation to Normality


Y1

Est.Power Std.Err. Wald Lower Bound Wald Upper Bound


0.3066
0.0929
0.1245
0.4887

Likelihood ratio tests about transformation parameters


LRT df
pval
LR test, lambda = (0) 9.243994 1 2.362690e-03
LR test, lambda = (1) 35.229259 1 2.930869e-09

0.15

15 18

2.0

2.5

3.0

3.5

Normal QQ
17

17

0.05

Residuals

Residuals vs Fitted

Standardized residuals

model2<-lm(Volume^(0.3066)~Height+Girth)
par(mfrow=c(2, 2))
plot(model2)

15

18

0.0

2.5

3.0

3.5

Residuals vs Leverage
11

18

0.00

0.05

0.10

0.15

Leverage

model3<-powerTransform(Volume~Height+Girth,data=trees)
summary(model3)
bcPower Transformation to Normality
Y1

0.5

17

Cook's distance

Fitted values

##
##
##
##
##
##
##
##
##

0.8

151718

ScaleLocation

2.0

Theoretical Quantiles

Standardized residuals

Standardized residuals

Fitted values

Est.Power Std.Err. Wald Lower Bound Wald Upper Bound


0.3066
0.0929
0.1245
0.4887

Likelihood ratio tests about transformation parameters


LRT df
pval
LR test, lambda = (0) 9.243994 1 2.362690e-03
LR test, lambda = (1) 35.229259 1 2.930869e-09

Box-Cox on both sides


library(alr3)
##
6

0.5

0.20

## Attaching package: 'alr3'


##
## The following object is masked from 'package:HH':
##
##
residual.plots
##
## The following object is masked from 'package:MASS':
##
##
forbes

6000
4000
2000

MaxSalary

8000

data("salarygov")
attach(salarygov)
plot(Score,MaxSalary)

200

400

600
Score

model1<-lm(MaxSalary~Score)
par(mfrow=c(2, 2))
plot(model1)

800

1000

1000

3000

5000

375
374
283

374
283

Normal QQ

4 0

Standardized residuals

375

2000

Residuals

3000

Residuals vs Fitted

0.0

1.5

5000

375

0.000

0.005

bcPower Transformation to Normality


Est.Power Std.Err. Wald Lower Bound Wald Upper Bound
0.5481
0.0957
0.3606
0.7357

Likelihood ratio tests about transformation parameters


LRT df
pval
LR test, lambda = (0) 35.16895 1 3.023047e-09
LR test, lambda = (1) 21.09339 1 4.374339e-06

bcPower Transformation to Normality


Y1

0.010
Leverage

bcScore<-Score^0.5481
summary(powerTransform(MaxSalary~bcScore))
##
##
##
##
##
##
##
##
##

Est.Power Std.Err. Wald Lower Bound Wald Upper Bound


-0.1265
0.0656
-0.255
0.002

Likelihood ratio tests about transformation parameters


LRT df
pval
LR test, lambda = (0)
3.740768 1 0.05310005
LR test, lambda = (1) 270.959278 1 0.00000000

0.5

Cook's distance

summary(powerTransform(Score))

Score

374
283

Fitted values

##
##
##
##
##
##
##
##
##

Residuals vs Leverage
6

374
283

3000

375

ScaleLocation

1000

Theoretical Quantiles

Standardized residuals

Standardized residuals

Fitted values

0.015

0.020

bcMaxSalary<-MaxSalary^(-0.1265)
model<-lm(bcMaxSalary~bcScore)
summary(model)
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##

Call:
lm(formula = bcMaxSalary ~ bcScore)
Residuals:
Min
1Q
-0.0216179 -0.0046042

Median
0.0001454

3Q
0.0045908

Max
0.0224980

Coefficients:

Estimate Std. Error t value Pr(>|t|)


(Intercept) 0.4393118 0.0014267 307.92
<2e-16 ***
bcScore
-0.0025335 0.0000488 -51.91
<2e-16 ***
--Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.006974 on 493 degrees of freedom
Multiple R-squared: 0.8454, Adjusted R-squared: 0.845
F-statistic: 2695 on 1 and 493 DF, p-value: < 2.2e-16

136

13

375

0.34

0.36

0.38

0.40

Normal QQ
136

0 2

0.02
0.02

Residuals

Residuals vs Fitted

Standardized residuals

par(mfrow=c(2, 2))
plot(model)

37513

1.0

0.34

0.36

0.38

0.40

Fitted values

Residuals vs Leverage
136

13

Standardized residuals

ScaleLocation
375136

Theoretical Quantiles

0.0

Standardized residuals

Fitted values

Cook's distance
0.000

0.005

0.010

Leverage

142

375

0.015

You might also like