R Command2

library(verification)
library(Hmisc)
library(Design)
library(rpart)
library(survival)
library(Epi)
require(MASS)
library(car)
library(graphics)
library(wle)
library(epiR)
library(stats)
library(epicalc)
library(hexbin)
library(ISwR)
library(sma)
library(geneplotter)
library(beeswarm)
library(gstat)
library ("fBasics")
library ("Rcmdr")
library(tcltk)
library(scatterplot3d)
library(amap)
library(pwr)
library(Biobase)
library(mclust)
library(ClassDiscovery)
library(scatterplot3d)
data(name of data base)
attach(name of data base)
detach(name of data base)
install.packages("verification",dep=T)
install.packages("Hmisc",dep=T)
install.packages("Design",dep=T)
install.packages("rpart",dep=T)
install.packages("survival",dep=T)
install.packages("amap",dep=T)
install.packages("Epi",dep=T)
install.packages("MASS",dep=T)
install.packages("car",dep=T)
install.packages("wle",dep=T)
install.packages("epiR",dep=T)
install.packages("stat",dep=T)
install.packages("epicalc",dep=T)
install.packages("hexbin",dep=T)
install.packages("ISwR",dep=T)
install.packages("sma",dep=T)
install.packages("lattice",dep=T)
install.packages("beeswarm",dep=T)
install.packages("gstat",dep=T)
install.packages("Rcmdr",dep=T)
install.packages("pwr",dep=T)
install.packages("mclust",dep=T)
install.packages("ClassDiscovery",dep=T)
install.packages("Biobase",dep=T)
install.packages("scatterplot3d",dep=T)
source("http://bioinformatics.mdanderson.org/OOMPA/oompaLite.R")
oompaLite()
oompainstall(groupName="all")
source("http://www.stat.washington.edu/mclust/license.txt")

A:Principal
Components Analysis
PC <- princomp(~SET_pos_106_USE+X205225_at, cor=TRUE, data=IHC_465)
PC
Call:
princomp(formula = ~SET_pos_106_USE + X205225_at, data = IHC_465,
cor = TRUE)
Standard deviations:
Comp.1
Comp.2
1.3788400 0.3143252
2
variables and
465 observations.
unclass(loadings(PC)) #
Comp.1
Comp.2
SET_pos_106_USE -0.7071068
0.7071068
X205225_at
-0.7071068 -0.7071068
PC$sd^2 #
Comp.1
Comp.2
1.90119966 0.09880034
t( PC$sd * t( PC$loadings ) )[, drop = FALSE] #

Comp.1
Comp.2
SET_pos_106_USE -0.974987
X205225_at
0.2222615
-0.974987 -0.2222615
unclass(loadings(PC))[,c(1, 2)] #
Comp.1
Comp.2
SET_pos_106_USE -0.7071068
X205225_at
0.7071068
-0.7071068 -0.7071068
0.7
0.5
0.6
lumAScore
0.8
0.9
Scatter plot
10
20
30
40
50
ONCOTYPE_DX_RS_score
Variance
10
scatterplot(lumAScore~ONCOTYPE_DX_RS_score)
10
Average
15
smoothScatter(Average,Variance,ylim=c(0,10),xlim=c(0,15))
smoothScatter(Average,Variance,ylim=c(0,10),xlim=c(0,15),ce
x.axis=2,cex.lab=2, cex.sub=2)
10
0.63
0.93
0.82
FOXM1_202580_x_at
0.59
0.91
0.85
MKI67_212021_s_at
0.77
0.71
TOP2A_FOXM1_NKI672
0.91
7 8 9
0.74
9.0
9.5
10
7.5
8.5
9.5
TOP2A_201292_at
11
13
8.5
GGIaverage
10
11
12
13
7.5
8.0
8.5
9.0
9.5 10.0
8.5
9.0
9.5
panel.cor <- function(x, y, digits=2, prefix="", cex.cor)

{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r = (cor(x, y))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex * abs(r))
}
pairs(~TOP2A_201292_at+FOXM1_202580_x_at+MKI67_212021_s_at+TOP2A_FOXM
1_NKI67+GGIaverage,
lower.panel=panel.smooth,
upper.panel=panel.cor,
data=
X203440_at
X202274_at
X201426_s_at
X201131_s_at
X201015_s_at
Mainz_ERpos_grade2)
X201015_s_at
X201131_s_at
X201426_s_at
X202274_at
X203440_at
data(all)
# name of data set is all
attach(all)
plot.cor(x= cor(all, method = "spearman" ),new = FALSE,labels =
names( all ),zlim
M106
M107
= c( -1.0, 1.0 ) )
M108
M111
M113
M116
M117
M106 1.0000000 0.7818627 0.5588235 0.6348039 0.6004902 0.7426471

0.5882353
M107 0.7818627 1.0000000 0.8161765 0.7720588 0.6666667 0.6936275
0.8382353
M108 0.5588235 0.8161765 1.0000000 0.7058824 0.7769608 0.7083333
0.9117647
M111 0.6348039 0.7720588 0.7058824 1.0000000 0.8627451 0.4607843
0.7867647
M113 0.6004902 0.6666667 0.7769608 0.8627451 1.0000000 0.5490196
0.7573529
M116 0.7426471 0.6936275 0.7083333 0.4607843 0.5490196 1.0000000

0.6838235
M117 0.5882353 0.8382353 0.9117647 0.7867647 0.7573529 0.6838235
1.0000000
M120 0.4460784 0.5563725 0.6936275 0.5784314 0.7401961 0.6078431
0.6274510
M121 0.5906863 0.5441176 0.7034314 0.8186275 0.8333333 0.5318627
0.6323529
M120
M121
M106 0.4460784 0.5906863

M107 0.5563725 0.5441176
M108 0.6936275 0.7034314
M111 0.5784314 0.8186275
M113 0.7401961 0.8333333
M116 0.6078431 0.5318627
cor( all, method = "spearman" )
Courier New
Non-parametric
Parametric
Wilcoxon
T-test
Kruskal wallis
ANOVA
Fisher
X2
Spearmann
Peason
Wilcoxon
t,test
Age- continuous value

group-categorical (0,1,2,,,)
data(aa)
attach(aa)
t.test(Age~group,var.equal=T,data=aa)
p-value = 0.4842
Wilcox (Boxpolt :Kruskal-Wallis :Wilcox)
wilcox.test(Age~group,var.equal=T,data=aa)
Wilcoxon rank sum test with continuity correction
p-value = 0.6755
Welch
data(aa)
t.test(Age~group,var.equal=F,data=aa)
Welch Two Sample t-test
p-value = 0.2998
Fisher Exact test

3*2
70 30
45
x<-matrix(c(70,30,
45,5,
6,3),nrow=3,ncol=2,byrow=T)
#nrow :
Number of ROW ncol:

of column (=
Number

)
2*3
42, 7,
36, 3,
x<-matrix(c(42,7,1,36,3,0),nrow=2,ncol=3,byrow=T)
fisher.test(x)
X2
70 30
45 2 X2 Fisher
Fisher
x<-matrix(c(70,30,45,5),2,byrow=T)
fisher.test(x)
Fisher's Exact Test for Count Data
p-value = 0.007264
Pearson's Chi-squared test
x<-matrix(c(70,30,45,5),2,byrow=T)
chisq.test(x)
Pearson's Chi-squared test with Yates' continuity correction
X-squared = 6.3773, df = 1, p-value = 0.01156
x<-matrix(c(7,6,10,14,9,18),2,byrow=T)
chisq.test(x)
data:
X-squared = 0.17, df = 2, p-value = 0.9185
3X3
x<matrix(c(652,1537,598,242,36,46,38,21,218,327,106,67),nrow=3,byrow=
T)
colnames(x)<-c("0","1-150","151-300",">300")
rownames(x)<-c("Married","Prev.married","Single")
x
0 1-150 151-300 >300
Married
1537
598
242
36
46
38
21
218
327
106
67
Prev.married
652
Single
chisq.test(x)

data:
X-squared = 51.6556, df = 6, p-value = 2.187e-09
Cochran-Mantel-Haenszel Chi-Squared Test for Count Data v2.11.0

library(epicalc)
cc(cancer, snp)
Odds ratio from prospective/X-sectional study
Odds of outcome
OR = 2
1/2
95% CI = 0.72 , 5.81
1/4
non-exposed
exposed
Exposure category
cancer
snp
can Non Total
neg
24
25
49
pos
10
21
31
Total
34
46
80
OR =
95% CI = 0.72 5.81

Chi-squared = 2.17 ,
1 d.f. , P value = 0.14
Fisher's exact test (2-sided) P value = 0.168

mhor(cancer, snp,rec)
Stratified analysis by
rec
OR lower lim. upper lim. P value

rec non
2.68
0.749
11.22
0.105
rec rec
1.56
0.166
16.43
1.000
M-H combined 2.37
0.873
6.42
0.091
M-H Chi2(1) = 2.86 , P value = 0.091

Homogeneity test, chi-squared 1 d.f. = 0.23 , P value = 0.633
Stratified prospective/X-sectional analysis
Odds of outcome
recnon: OR = 2.68 (0.75, 11.22)

recrec: OR = 1.56 (0.17, 16.43)
1/2
MH-OR = 2.37 (0.87, 6.42)

homogeneity test P value = 0.633
1/4
Non-exposed
Exposed
Outcome= cancer , Exposure= snp
x<-matrix(c(70,30,45,5,10,13),2,byrow=T) #2 : Number of ROW (=yoko

retsu)
mantelhaen.test(x, y = NULL, z = NULL,

alternative = c("two.sided", "less", "greater"),
correct = TRUE, exact = FALSE, conf.level = 0.95)
x either a 3-dimensional contingency table in array form where each dimension
is at least 2 and the last dimension corresponds to the strata, or a factor object
with at least 2 levels. y a factor object with at least 2 levels; ignored if x is an
array. z a factor object with at least 2 levels identifying to which stratum the
corresponding elements in x and y belong; ignored if x is an array. alternative
indicates the alternative hypothesis and must be one of "two.sided", "greater"
or "less". You can specify just the initial letter. Only used in the 2 by 2 by K
case. correct a logical indicating whether to apply continuity correction when
computing the test statistic. Only used in the 2 by 2 by K case. exact a logical
indicating whether the Mantel-Haenszel test or the exact conditional test (given
the strata margins) should be computed. Only used in the 2 by 2 by K case.
conf.level confidence level for the returned confidence interval. Only used in
the 2 by 2 by K case.
Anova
data(hako)
anova(lm(Age~group,data=hako) )
group
308.2
154.1
1.1583 0.3303 (P )
Kruskal-Wallis (Boxpolt :Kruskal-Wallis :Wilcox)

data(hako)
kruskal.test(Age~group,data=hako)
Kruskal-Wallis rank sum test
data:
Age by group
Kruskal-Wallis chi-squared = 2.3641, df = 2, p-value = 0.3066
3e-05
2e-05
Density
4e-05
5e-05
Centroid TN according to subgroup definition

by gene expression - MDA dataset
0e+00
1e-05
ERnegativeHer2negative (by gene)

Others (by gene)
-1e+05
-5e+04
0e+00
5e+04
Centroid TN
Density
Data frame: mda
bb, ERnegHer2neg: categorical value
dd, continuous value
Centroid_TN.TN<-mda$dd[mda$bb=="ERnegHer2neg"]
bb=="ERnegHer2neg"bb ERnegHer2neg categorical

mda: data dd: continuous value
Centroid_TN.notTN<-mda$dd[mda$bb!="ERnegHer2neg"]
plot(density(Centroid_TN.TN),col=4,lwd=3,main="Kinome,,,,dataset",x
lab="Kinome score TN",xlim=c(-100000, 60000)) #lwd wide of line
lines(density(Centroid_TN.notTN),lwd=3,col=2)
legend(locator(1),
legend=c("ERnegativeHer2negative
(by
gene)
",
"Others (by gene) "), lty=c(1,1), lwd=c(3,3), col=c("blue", "red"))

x<-mda233$AR_211110_s_at[mda233$ERbyGENE=="1ERpos"]
y<-mda233$AR_211110_s_at[mda233$ERbyGENE=="2ERneg"]
plot(density(y),lwd=3,xlab="Log2
converted
gene
expression",xlim=c(0,16), lty=2, main="",ylim=c(0,0.6)) #lwd wide

of line
lines(density(x),lwd=3,lty=1)
0.3
0.0
0.1
0.2
Density
0.4
0.5
0.6
density.default(x = AKT1)
10
N = 286 Bandwidth = 0.1855
plot(density(AKT1))
abline(v=10) #Tate sen
abline(h=1.3) #Yoko sen
11
12
Histogram of Ozone
10
20
Frequency
20
10
Frequency
30
30
Histogram of Wind
10
15
20
Wind
50
100 150
Ozone
:
Data frame*: airquality
Wind: continuous value
Ozone: continuous value
attach(airquality)
shapiro.test(Wind)
W = 0.9858, p-value = 0.1178
shapiro.test(Ozone)
W = 0.8787, p-value = 2.790e-08
Wind Ozone
Histgram
layout(t(1:2))
hist(Wind)
main="Kinome,,,,dataset",xlab="Kinome
100, 600)
hist(Ozone)
10
15
20
Wind
3
layout(t(1:2:3))
hist(Wind)
hist(Wind)
hist(Wind)
20
10
0
0
10
Frequency
20
Frequency
30
20
0
10
Frequency
Histogram of Ozone
30
Histogram of Ozone
30
Histogram of Wind
50
100
Ozone
150
50
100
Ozone
150
TN",xlim=c(-
10
8
2
Log2 converted data
12
14
Stripchart
ERposHER2negIBC_nonrec
HER2posIBC_nonrec
HER2posIBC_rec
TNIBC_nonrec
TNIBC_rec
data(IBC)
attach(IBC)
mHT<-tapply(TIG1,IBC_rec_vs_nonrec,mean) #mean
sHT<-tapply(TIG1,IBC_rec_vs_nonrec,sd)
IS<-c(1,2,3,4,5)+0.15 #5 valuable 15%right
stripchart(TIG1~IBC_rec_vs_nonrec,method="jitter",vert=T,ylab="Log2
converted data")
points(IS,mHT,pch=1) #plot mean
arrows(IS,mHT-sHT,IS,mHT+sHT,code=3,angle=90,length=.1) #yokosen above
bar
10000
9500
9000
PUM1_201166_s_atROW
8500
8000
1Control_SUM149
2HDAC_SUM149
3Control_SUM190
4HDAC_SUM190
stripchart(PUM1_201166_s_atROW~group,method="jitter",vert=T,pch=
15, cex = 1.5)
Order rank
Data frame attach weht attach(weht)
Library(survival)
weht<-tk[order(tk$IKS),]
#tk: data set
attach(weht)
plot(IKS,col=unclass(Molecular)+1,pch=16:16,xlab="Rank ordered by
gene expression", ylab="Immune Kinome Score") #
col=unclass(Molecular)+1pch=16:16
title("TRANSBIG data sets")
legend(locator(1),
legend=c("ERnegHER2neg",
"ERposHER2neg",
"HER2pos"), lwd=c(3,3,3), col=c("blue", "red", "green"))

detach(weht)
Logistic Regression Model

glm, lrm glm
Logistic Regression Model
Step2 95%
p
If we want to get CI and Odds ration regardless of model, put the
following command!
x<glm(relapse~HER2byGENE+ERStatus+IKS3Label,family=binomial,data=wk)
summary(x)
exp(coef(x))
#odds ratio
exp(confint(x))
#95%coffidencial kukan
x<glm(relapse~HER2byGENE+ERStatus+IKS3Label,family=binomial,data=wk)
summary(x)
summary(x2<-step(x))
exp(coef(x2))
#p value, stepwise methods

#odds ratio
exp(confint(x2))
BR3<-cut(PTK6, c(0, 6,15))
#divide to 2 group
x<-glm(pCR~BR3,family=binomial,data=md233,
subset=(Molecular=="ERposHER2neg"))
#divide to sub group
Logistic Regression Model 2

relapse: event (1,0),
factor : categorical value continuous
value P-value
x<-lrm(relapse~ERstatusBYgene+MKS12,x=TRUE, y=TRUE,data=wk)
x
p < 0.05
fastbw(x) #
Logistic Regression Model 3

require(MASS)
data(birthwt)
attach(birthwt)
low<-factor(low)
race<-factor(race,labels=c("white","black","other"))
print(table(low,race))
race
low white black other
0
73
15
42
23
11
25
smoke<-(smoke>0);print(table(low,smoke))
smoke
low FALSE TRUE
0
86
44
29
30
bw<-data.frame(low,age,lwt,race,smoke,ptl,ht,ui,ftv)
#selection of
event+factor
detach(birthwt)
print(summary(res<-glm(low~.,family=binomial,data=bw)))
#low~age+lwt+,,
Call:
glm(formula = low ~ ., family = binomial, data = bw)
Deviance Residuals:
Min
1Q
Median
3Q
Max
-1.8946
-0.8212
-0.5316
0.9818
2.2125
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept)
0.480623
1.196888
0.402
0.68801
age
-0.029549
0.037031
-0.798
0.42489
lwt
-0.015424
0.006919
-2.229
0.02580 *
1.272260
0.527357
2.413
0.01584 *
raceblack
raceother
0.880496
0.440778
1.998
0.04576 *
smokeTRUE
0.938846
0.402147
2.335
0.01957 *
ptl
0.543337
0.345403
1.573
0.11571
ht
1.863303
0.697533
2.671
0.00756 **
ui
0.767648
0.459318
1.671
0.09467 .
ftv
0.065302
0.172394
0.379
0.70484
--Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
(Dispersion parameter for binomial family taken to be 1)

Null deviance: 234.67
on 188
degrees of freedom
Residual deviance: 201.28
on 179
degrees of freedom
AIC: 221.28
Number of Fisher Scoring iterations: 4
NagelkerkeR2<-function(rr,n)(1-exp((rr$dev-rr$null/n))/(1-exp(rr$null/n)))
print(NagelkerkeR2(res,nrow(bw)))
[1] -1.060980e+87
print(exp(coef(res)))
(Intercept)
age
smokeTRUE
ptl
1.6170819
2.5570281
lwt
ht
0.9708833
1.7217428
print(exp(confint(res)))
Waiting for profiling to be done...
2.5 %
ui
0.9846941
6.4449886
97.5 %
(Intercept) 0.1586248 17.7689406

age
0.9014649
1.0429731
lwt
0.9706547
0.9975382
raceblack
1.2733620 10.2378101
raceother
1.0269690
5.8422688
smokeTRUE
1.1753715
5.7425658
ptl
0.8838560
3.4765158
ht
1.7030020 27.6935195
raceblack
ftv
3.5689085
2.1546928
raceother
1.0674812
2.4120956
ui
0.8662663
5.3169672
ftv
0.7534567
1.4900589
print(summary(res2<-step(res))) #impotant!
Start:
AIC=221.28
low ~ age + lwt + race + smoke + ptl + ht + ui + ftv

Df Deviance
AIC
- ftv
201.43 219.43
- age
201.93 219.93
<none>
201.28 221.28
- ptl
203.83 221.83
- ui
204.03 222.03
- race
208.75 224.75
- lwt
206.80 224.80
- smoke
206.91 224.91
- ht
208.81 226.81
Step:
AIC=219.43
low ~ age + lwt + race + smoke + ptl + ht + ui

Df Deviance
- age
<none>
AIC
201.99 217.99
201.43 219.43
- ptl
203.95 219.95
- ui
204.11 220.11
- race
208.77 222.77
- lwt
206.81 222.81
- smoke
206.92 222.92
- ht
208.81 224.81
Step:
AIC=217.99
low ~ lwt + race + smoke + ptl + ht + ui

Df Deviance
<none>
- ptl
AIC
201.99 217.99
1
204.22 218.22
- ui
204.90 218.90
- smoke
207.73 221.73
- lwt
208.11 222.11
- race
210.31 222.31
- ht
209.46 223.46
Call:
glm(formula = low ~ lwt + race + smoke + ptl + ht + ui, family =
binomial,
data = bw)
Deviance Residuals:
Min
1Q
Median
3Q
Max
-1.9049
-0.8124
-0.5241
0.9483
2.1812
Coefficients:
(Intercept) -0.086550
0.951760
-0.091
0.92754
lwt
-0.015905
0.006855
-2.320
0.02033 *
raceblack
1.325719
0.522243
2.539
0.01113 *
raceother
0.897078
0.433881
2.068
0.03868 *
smokeTRUE
0.938727
0.398717
2.354
0.01855 *
ptl
0.503215
0.341231
1.475
0.14029
ht
1.855042
0.695118
2.669
0.00762 **
ui
0.785698
0.456441
1.721
0.08519 .
--Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1

on 188
degrees of freedom
on 181
degrees of freedom
AIC: 217.99
print(NagelkerkeR2(res2,nrow(bw))) #
[1] -2.138245e+87
print(exp(coef(res2)))
#impotant! To obtain odds ratio
(Intercept)
lwt
ptl
ht
0.9170901
1.6540303
raceblack
raceother
smokeTRUE
2.4524265
2.5567241
ui
0.9842205
6.3919640
3.7648926
2.1939368
print(exp(confint(res2))) #95% coeficient

2.5 %
97.5 %
(Intercept) 0.1483796
6.2982590
lwt
0.9702639
0.9968395
raceblack
1.3555550 10.6805086
raceother
1.0617504
5.8773934
smokeTRUE
1.1846704
5.7092626
ptl
0.8552098
3.3045306
ht
1.6936142 27.2645876
ui
0.8879894
5.3881165
>
95 P
3.765
1.355
10.68
0.011
> library(survival)
Loading required package: splines
> library(MASS)
> data(wk)
> attach(wk)
>
x<-
glm(relapse~HER2byGENE+ERStatus+IKS3Label,family=binomial,data=wk)
> summary(x)
Call:
glm(formula = relapse ~ HER2byGENE + ERStatus + IKS3Label, family =
binomial,
data = wk)
Deviance Residuals:
Min
1Q
Median
3Q
Max
-1.0339
-0.9881
-0.8733
1.3427
1.5344
Coefficients:
(Intercept)
-0.347302
0.352044
-0.987
0.324
HER2byGENEHER2pos
-0.041258
0.327125
-0.126
0.900
ERStatusER+
-0.009563
0.298193
-0.032
0.974
IKS3Label2Intermediate -0.106124
0.302806
-0.350
0.726
IKS3Label3High
0.323853
-1.268
0.205
-0.410629

on 285
degrees of freedom
on 281
degrees of freedom
AIC: 385.11
> summary(x2<-step(x))
Start:
AIC=385.11
relapse ~ HER2byGENE + ERStatus + IKS3Label

Df Deviance
AIC
- IKS3Label
376.87 382.87
- ERStatus
375.12 383.12
- HER2byGENE
375.13 383.13
<none>
Step:
375.11 385.11
AIC=382.87
relapse ~ HER2byGENE + ERStatus

Df Deviance
AIC
- HER2byGENE
376.93 380.93
- ERStatus
377.03 381.03
<none>
Step:
376.87 382.87
AIC=380.93
relapse ~ ERStatus
Df Deviance
- ERStatus
AIC
377.11 379.11
<none>
Step:
376.93 380.93
AIC=379.11
relapse ~ 1
Call:
glm(formula = relapse ~ 1, family = binomial, data = wk)
Deviance Residuals:
Min
1Q
Median
3Q
Max
-0.9623
-0.9623
-0.9623
1.4089
1.4089
Coefficients:
(Intercept)
-0.5295
0.1224
-4.325 1.53e-05 *** If we get the
significance, we get the data

--Signif. codes:
0 *** 0.001 ** 0.01 * 0.05 . 0.1 1

on 285
degrees of freedom
on 285
degrees of freedom
AIC: 379.11
> exp(coef(x2))
(Intercept)
0.5888889
> exp(confint(x2))
2.5 %
97.5 %
0.4619652 0.7469545
>
Last modified: Nov 17, 2004
R glm
R
glm(family=binomial)
y ~ x1+x2+x4
yx1x2x3
lr.data lr.data
x1, x2
data <- read.table("lr.data", header=TRUE)

result <- glm(y ~ x1+x2, data, family=binomial)
result
summary(result)
coefficients(result)
residuals(result)
result0 <- glm(y ~ 1, data, family=binomial)

anova(result0, result, test="Chisq")
> data <- read.table("lr.data", header=TRUE)

> result <- glm(y ~ x1+x2, data, family=binomial)
> result
Call:
glm(formula = y ~ x1 + x2, family = binomial, data = data)
Coefficients:
(Intercept)
x1
x2
-5.645581
0.008297
0.011386
Degrees of Freedom: 97 Total (i.e. Null);

Null Deviance:
95 Residual
76.71
Residual Deviance: 72.18
AIC: 78.18
> summary(result)
Call:
glm(formula = y ~ x1 + x2, family = binomial, data = data)
Deviance Residuals:
Min
1Q
Median
3Q
Max
-1.4350
-0.5413
-0.4625
-0.3801
2.2197
Coefficients:
(Intercept) -5.645581
3.048239
-1.852
0.0640 .
x1
0.008297
0.021208
0.391
0.6956
x2
0.011386
0.005740
1.984
0.0473 *
--Signif. codes:
0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1

on 97
degrees of freedom
on 95
degrees of freedom
AIC: 78.184
> coefficients(result)
(Intercept)
x1
x2
-5.645580693
0.008297108
0.011386484
> residuals(result)
1
2.0972899
1.5625411
2.1967232
2.1767753
1.8665989
2.0548909
10
11
12
13
1.6885385 -0.3601806 -0.3990558
1.9670456
1.9349012
2.2197067
18
19
20
7
-0.4170057
8
14
1.7065171
15
21
16
17
1.9215695
1.4563358 -0.5168576 -0.5069061 -0.3865767 -0.7133382
-0.3640321
> result0 <- glm(y ~ 1, data, family=binomial)

> anova(result0, result, test="Chisq")
Analysis of Deviance Table
Model 1: y ~ 1
Model 2: y ~ x1 + x2
Resid. Df Resid. Dev Df Deviance P(>|Chi|)
1
97
76.714
95
72.184
4.530
0.104
0.8
1.0
ROC-LDLT
0.6
0.4
Sens: 67.3%
Spec: 70.7%
PV+: 37.9%
PV-: 89.0%
Variable
est. (s.e.)
(Intercept) -2.992 (0.403)
test 0.000 (0.000)
0.2
Sensitivity
x = 6778.926
0.0
Model: y ~ x
Area under the curve: 0.733
0.0
0.2
0.4
0.6
0.8
1.0
1-Specificity
ROC ROC
Event 10
Data frame: mdk2
MKS: continuous value
data(mdk2)
x<-(MKS)
y<-c(rep(1,49),rep(0,184))
#event 1:N=49, event 0:N-184
ROC(x,y,plot="ROC",main="ROC")
gs5: data sets gns : event 0 or 1, te : continuous value

AUC
roc.area(gs5$gns, gs5$te)
CI 95% AUC
rc<-rcorr.cens(gs5$te,gs5$gns)
rc
aucbas=0.5 + (.5 * (rc[2]-(1.96*(rc[3]/2))))
auchaut=0.5 + (.5 * (rc[2]+(1.96*(rc[3]/2))))
aucbas
auchaut
#Ex: CI95 0.6-0.9
English version
roc.area(JBI_259$relapse, JBI_259$GGI_128)
rc<-rcorr.cens(JBI_259$GGI_128,JBI_259$relapse)
rc
aucbas=0.5 + (.5 * (rc[2]-(1.96*(rc[3]/2))))
auchaut=0.5 + (.5 * (rc[2]+(1.96*(rc[3]/2))))
aucbas
auchaut
#Ex: CI95 0.6-0.9
15000
10000
0
5000
ROW data
20000
25000
Kinoma score ERpos/Her2neg-A
Cell19
Cell23
Cell51
JBI
Transbig
Wang
BOXPLOT (Boxpolt :Kruskal-Wallis :Wilcox)

boxplot(bb~aa,data=erc,col=c("blue","red","green"),main="Kinoma
score in Cell line 19",ylab="ROW data", ylim=c(0,1310000))
boxplot(dd~bb,names=c("HER2neg", "HER2pos), col=c("red", "blue"))
title("Centroid
HER2pos
and
molecular
subgroup\n
JBI
dataset",
ylab="Centroid HER2pos")
boxplot(bb~aa,data=ea,main="Kinoma score ERpos/Her2neg-A",ylab="ROW
data",
ylim=c(0,25000),names=c("Cell19","Cell23","Cell51","JBI","Transbig"
,"Wang"), col=rainbow(10))
legend(locator(1), legend=c("Kruskal-Wallis p=0.0579"))
14
12
10
8
6
4
10
11
12
boxplot(SUM149.0uM.1,SUM149.0uM.2,SUM149.0uM.3,SUM149.1uM.1,SUM149.
1uM.2,SUM149.1uM.3,SUM190.0uM.1,SUM190.0uM.2,SUM190.0uM.3,SUM190.1u
M.1,SUM190.1uM.2,SUM190.1uM.3)
COX (Cox proportional hazards model)

data frame: wk, RFSmonths: ,
relapse: event (1,0),subset:
Univariate analysis
Library(survival)
x<coxph(Surv(X120_RFSmonths,relapse)~IKS3Label,method="breslow",data=
wk, subset=(Molecular=="TN"))
summary(x)
Multivariate analysis
Library(survival)
x<coxph(Surv(X120_RFSmonths,relapse)~MKS100+IKS50,method="breslow",da
ta=tk,subset=(ERbyGENE=="ERpos"))
summary(x)
Call:
coxph(formula = Surv(time, event) ~ Pt, data = KM3)
n= 136
coef exp(coef) se(coef)
Pt 1.08
0.409 2.64 0.0084 P
2.94
exp(coef) exp(-coef) lower .95 upper .95

Pt
2.94
Rsquare= 0.048
0.34
1.32
6.56
(max possible= 0.802 )
Likelihood ratio test= 6.64
on 1 df,
p=0.00996 large number
means good result

Wald test
= 6.95
on 1 df,
p=0.00838
Score (logrank) test = 7.64
on 1 df,
p=0.0057
BR3<-cut(LYN, c(0, 9.5,15))
x<-coxph(Surv(X120_RFSmonths,
relapse)~BR3,method="breslow",data=WGG,
subset=(Molecular=="ERposHER2neg"))
summary(x)
Continuous value categorical value
x<-c(MKS/100) #100 Colum
Survival analysis: Multivariate analysis (stepwise)

COX model
x<coxph(Surv(OSt5y,Ose5y)~NKI70+R76+DX+MKSLabel2+IKS2Label+TOP2Lavel2
+KI67Label2+ER+prpos+her+TLabel2+NLabel2+gradeLabel2+ageLabel2,meth
od="breslow",data=marker)
summary(x)
exp(coef(x2))
exp(confint(x2))
#p value
#odds ratio
Time: RFSmonths, event: relapse, group: IKS4Label, subset
library(survival)
x<-survfit(Surv(X120_RFSmonths, relapse)~ IKS4Label, conf.int=.95,
subset = (Molecular == "ERposHER2neg"))
plot(x,
xlab="Months",
legend.text=c("Low",
ylab="Distant
Event
Free
Survival",
"Low-Intermediate","High-
Intermediate","High"), lty=c(1,2,3,4)) #lty line

title("DEFS according to IKS sub group ERpos/Her2neg Wang dataset")
legend(locator(1), legend=c("Logrank test p=0.0579"))
0.6
0.4
Logrank test p=0.0579
0.2
Distant Event Free Survival
0.8
1.0
DEFS according to IKS sub group ERpos/Her2neg Wang dataset
Low
Low-Intermediate
0.0
High-Intermediate
High
0
50
100
150
Months
KM
x<-survfit(Surv(RFSmonths,
relapse)~
IKS4Label,
conf.int=.95,
subset = (Molecular == "ERposHER2neg"))

plot(x,
xlab="Months",
ylab="Distant
Intermediate","High"),
Event
Free
Survival",
"Low-Intermediate","Highlty=c(2,4,6,8),
col=c("red",
"green",
"blue","orange"))
title("DEFS according to IKS sub group ERpos/Her2neg Wang dataset")
legend(locator(1), legend=c("Logrank test p=0.0579"))
0.6
0.4
0.0
0.2
Distant Event Free Survival
0.8
1.0
No.
28
23
18
14
13
11
11
10
1Low
28
26
24
22
22
21
21
20
16
10
2High
12
24
36
48
60
72
84
96
108
120
Months
No. of Patients at risk
x<-survfit(Surv(X120_RFSmonths,
relapse)~
TIG1TNLabel2,
conf.int=.95, subset = (Molecular == "TN"), se.fit=FALSE)

survplot(x,type="kaplan-meier",pr=FALSE,conf.int=FALSE,time.inc=12,
label.curves=FALSE,
lty=c(1,5),
n.risk=TRUE,
col=c("red",
conf.type=c("none"),
"blue"),
lwd=c(2,2),
ylab="Relapse Free Survival", conf.int=TRUE)
se.fit=FALSE,
xlab="Months",
1.0
0.8
0.6
0.4
Logrank test p=0.0579

takayuki
0.0
0.2
Survival Probability
360
720
1080
1440
1800
2160
2520
Days
library(verification)
library(Hmisc)
library(Design)
library(survival)
data(KM3)
attach(KM3)
d <- datadist(KM3)
options(datadist="d")
srv<-Surv(time,event)
fit<-survfit(srv~Pt, data=KM3)
fit
Call: survfit(formula = srv ~ Pt, data = KM3)
n events median 0.95LCL 0.95UCL
0 97
12
Inf
2879
Inf
1 39
12
Inf
1983
Inf
2880
3240
3600
survplot(fit,type="kaplanmeier",pr=TRUE,conf.int=TRUE,xlim=c(0,3600),time.inc=360)
legend(locator(1),
legend=c("Logrank
test
p=0.0579","takayuki"),lty=c(3,1)) #lty line
summary(fit)
Call: survfit(formula = srv ~ GGIaverageLabel, data = JBI_grade2)
4 observations deleted due to missingness
1Low
time n.risk n.event survival std.err lower 95% CI upper 95% CI
13.0
64
0.969
0.0217
0.881
0.992
18.0
62
0.953
0.0264
0.862
0.985
27.5
60
0.937
0.0304
0.841
0.976
30.5
57
0.921
0.0340
0.820
0.966
40.7
55
0.904
0.0373
0.799
0.956
45.3
53
0.887
0.0403
0.777
0.945
45.4
52
0.870
0.0430
0.756
0.933
53.0
49
0.852
0.0456
0.735
0.920
84.0
36
0.829
0.0501
0.702
0.905
98.8
27
0.798
0.0569
0.657
0.885
2High
time n.risk n.event survival std.err lower 95% CI upper 95% CI
11.0
61
0.984
0.0163
0.889
0.998
14.1
60
0.967
0.0228
0.875
0.992
14.6
59
0.951
0.0277
0.855
0.984
16.2
58
0.934
0.0317
0.835
0.975
23.6
56
0.918
0.0352
0.814
0.965
25.2
54
0.901
0.0385
0.792
0.954
29.6
53
0.884
0.0413
0.771
0.943
37.2
50
0.866
0.0441
0.750
0.931
37.3
49
0.848
0.0466
0.729
0.918
41.0
48
0.831
0.0489
0.708
0.905
42.5
47
0.813
0.0509
0.688
0.892
43.6
46
0.795
0.0528
0.667
0.878
44.6
45
0.778
0.0545
0.648
0.865
53.0
40
0.758
0.0565
0.626
0.849
78.0
24
0.695
0.0672
0.542
0.806
87.0
20
0.660
0.0723
0.499
0.781
Scatter plot
pairs(~mpg+disp+drat+wt,data=mtcars,
main="Simple Scatterplot Matrix")
Metformin data set
Counts
1602
1502
1402
1302
1202
1102
1002
902
802
701
601
501
401
301
201
101
1
Variance
15
10
0
2
10
12
14
Average
library(hexbin)
bin<-hexbin(Average, Variance, xbins=50)
plot(bin, main="Metformin data set")
PRLR, AVERAGE: categorical value
library(car)
library(survival)
plot(PRLR~AVERAGE, col=unclass(Molecular)+1, pch=16, cex=1.5)
title("Scatter
plot
ERstatus
80%
concentrated
ellipse") #
col=unclass(ERbyGENE)+1
legend(locator(1),
legend=c("ERpos","ERneg"),
lty=c(1,1),
lwd=c(2,2), col=c("green", "red"))

legend(locator(1), legend=c("pvalue = 0.00035","rho=-0.232"))
ellipse(c(mean(AVERAGE),mean(PRLR)),cov(cbind(AVERAGE,
PRLR)),sqrt(qchisq(.8,2)),lty=2,lwd=1,col="blue")
Correlation test ( )
Para-metric : Peason test
Non-parametric : spearman test
cor.test(PRLR,AVERAGE, method="spearman")
Spearman's rank correlation rho
data:
PRLR and AVERAGE
S = 2598560, p-value = 0.0003548

alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.2326059
cor.test(dose,len)
Pearson's product-moment correlation

data:
dose and len
t = 10.2501, df = 58, p-value = 1.243e-14

alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.6892521 0.8777169 95[0.68,0.88]
sample estimates:
cor
0.8026913
11.5
11.0
10.5
10.0
9.5
8.0
8.5
9.0
SET.pos.106.USE
10
12
ER.205225_at.USE
plot(SET.pos.106.USE~ER.205225_at.USE,
col=unclass(ER_Level4)+1,pch=unclass(ER_Level4)+1)
14
12000
Two-way Interaction Plot
10000
Pair
6000
0
2000
4000
mean of IGFBP7
8000
9
8
16
10
17
13
7
15
11
1
4
14
12
5
3
18
6
2
post
pre
pp
interaction.plot(ppERpos, Pair, IGFBP7, col=unclass(ERbyGENEUSE)+1,

lty=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1))
Pair:class
#pp:
pre-post,
Sensitivity Specificity PPV NPV
Disease + Disease - Total

Test + a
a+b
Test - c
c+d
Total a + c
b+d
a+b+c+d
Disease + Disease - Total

Test + a
a+b
Test - c
c+d
Total a + c
b+d
a+b+c+d
epi.tests(a = 670, b = 202, c = 74, d = 640, conf.level = 0.95,

verbose = FALSE)
Disease +
Disease -
Total
Test +
670
202
872
Test -
74
640
714
Total
744
842
1586
Point estimates and 95 % CIs:

--------------------------------------------------------Apparent prevalence:
0.55 (0.53, 0.57)
True prevalence:
0.47 (0.44, 0.49)
Sensitivity:
0.9 (0.88, 0.92)
Specificity:
0.76 (0.73, 0.79)
Diagnostic accuracy:
0.83 (0.81, 0.84)
Diagnostic odds ratio:
28.69 (21.52, 38.24)
Youden's index:
0.66 (0.61, 0.71)
Positive predictive value:
0.77 (0.74, 0.8)
Negative predictive value:
0.9 (0.87, 0.92)
Positive likelihood ratio:
3.75 (3.32, 4.24)
Negative likelihood ratio:
0.13 (0.11, 0.16)
Number needed to diagnose:
1.51 (1.41, 1.64)
---------------------------------------------------------
Others
Color
col=rainbow(10)
,col=red ,
col=c(red,blue,grey80, grey50,black,white)
,cm.colors(20) ,topo.colors(18)
Install
Epi
install.packages("Epi",dep=T)
lty=c(1,2,3) #lty line
Devide
plot(x,
xlab="Months",
ylab="Distant
Event
Free
Survival",
"Low-Intermediate","High-
Intermediate","High"), lty=c(1,2,3,4)) #lty line
Conver to log2
library(Biobase)
data(wang)
attach(wang)
dataDirectory <- system.file("data", package = "datasets")
exprsFile <- file.path(dataDirectory, "wang.txt")
wang<- as.matrix(read.table(exprsFile, header = TRUE, sep = "\t",
row.names = 1, as.is = TRUE))
exprsFile <- "c:/path/to/wang.txt"

annotation <- "hgu133a"
experimentData <- new("MIAME", name = "dudule", lab = "lpusztai",
contact
"lpus",
"www.lab.not.exist",
title
other
"jbi",
=
abstract
list(notes
"trial",
"Created
url
from
text
files"))
wlog=log2(wang)
write.table(wlog,
file="w_mas5log2",
quote=F,
sep="\t",
col.names=NA)
X
write.table(x$fs,
file="abcd.txt",
sep="\t",
col.names=T,row.names=T, quote=FALSE) #
Conbine
data(md233new)
data(md103new)
data(us)
x=cbind (md233new, md103new)
y=cbind (x, us)
write.table(y,
file="chemo", quote=F, sep="\t", col.names=NA)
subset
subset=(Molecular=="TN")
subset(mda233,Molecular=="ERposHER2neg"|Molecular=="TN")
name of data set, Molecular:name of row
100 Colum
mda233:

To make matrix
x<-matrix(c(1,2,3,4,5,6),nrow=2,ncol=3)
x <- rnorm(100)
# 100
hist(x, xlim=c(-4,4), ylim=c(0,0.5), prob=T, ann=F)#

par(new=T)
plot(density(x), xlim=c(-4,4), ylim=c(0,0.5),

xlab="" , ylab="" , main="" , col="red" )
Size of letters
cex.axis=2,cex.lab=2, cex.main=3, cex.sub=3
2 2
par(mfrow=c(2,2))
plot(sin)
plot(cos)
plot(asin)
plot(acos)
10
Density
0.0
2
10
10
10
density.default(x = TSPY1_217162_at)
10
Density
2
10
par(mfrow=c(3,3))
plot(density(BPY2_208331_at), xlim=c(2,10),xlab= "")
plot(density(CYorf14_207063_at), xlim=c(2,10), xlab= "")
plot(density(CSPG4LYP1_211461_at), xlim=c(2,10), xlab= "")
plot(density(USP9Y_206624_at), xlim=c(2,10), xlab= "")
plot(density(EIF1AY_204410_at), xlim=c(2,10), xlab= "")
plot(density(NLGN4Y_207703_at), xlim=c(2,10), xlab= "")
plot(density(LOC159110_216786_at), xlim=c(2,10), xlab= "")
plot(density(TSPY1_217162_at), xlim=c(2,10), xlab= "")
plot(density(SRY_207893_at), xlim=c(2,10),xlab= "")
Bar plot
10
0.00
0.2
Density
0.0
6
10
density.default(x = SRY_207893_at)
0.4
0.00 0.10 0.20 0.30
0.00
2
density.default(x = LOC159110_216786_at)
0.20
Density
0.20
0.10
Density
2
density.default(x = NLGN4Y_207703_at)
0.00
0.10
0.00
Density
density.default(x = EIF1AY_204410_at)
0.20
density.default(x = USP9Y_206624_at)
0.10
0.20
0.10
0.2
0.4
0.3
0.2
0.1
Density
2
Density
density.default(x = CSPG4LYP1_211461_at)
0.0
Density
density.default(x = CYorf14_207063_at)
0.0 0.1 0.2 0.3 0.4
density.default(x = BPY2_208331_at)
10
3.0
2.5
2.0
1.5
1.0
0.5
0.0
x<-matrix(c(0.246,
3.070,0.272,2.440,0.413,2.230,0.776,2.130,0.483,2.110,0.000,2.010,0.
726,1.980,0.355,1.980,0.586,1.970,0.451,1.970),2)
barplot(x,names=c("A","B","C","D","E","F","G","H","I","J"),beside=T,
col=c("black", "white"))
abline(h=1.30102999566398)
0.0
0.1
10
0.2
20
Ratio
0.3
30
0.4
0.5
40
barplot + line chart
Index
barplot(X.log,
ylim=c(0,40),axes=F)
abline(h=1.30102999566398)
axis(2)
par(new=T)
plot(Ratio,
axis(4)
axes=F, type="l",
lty=1,ylim=c(0,0.5))
PAM50
## Author: Yuan Qi <yqi1@mdanderson.org>
####
input:
mtx:
(MAS5-sc600)normalized,
log2-transformed
gene
expression values
PAM50.v2 <- function(mtx, fin.parameters="PAM50-parameters.RData" )
{
load(fin.parameters)
## added on 20091113F, for array platforms other than HGU133A.
if( nrow( mtx) != length(reference.median) ){
mtx = mtx[unlist(probes), ]
reference.median = reference.median[ unlist(probes) ]
}
mtx <- mtx - reference.median
len = sapply(probes, length)
p1 = character(length(len))
names(p1) = names(len)
for( i in 1:length(len)){
if( len[[i]] == 1){
p1[[i]] = probes[[i]]
}else if(len[[i]] > 1){
iqr1 = apply(mtx[probes[[i]],], 1, IQR)
p1[[i]] <- probes[[i]][ match(max(iqr1), iqr1) ]
}
}
gev <- mtx[p1,]
ctd1 <- ctd[names(p1),]
#type <- apply( gev, 2, function(x){
#corr <- cor(x, ctd1, method="spearman")
#colnames(corr)[match(max(corr), corr)]
#}
#)
outcorr = matrix( data=NA, nrow=ncol(gev), ncol=ncol(ctd1))

type = NULL
for( i in 1:ncol(gev)){
x = gev[, i]
corr <- cor(x, ctd1, method="spearman")
type[[i]] = colnames(corr)[match(max(corr), corr)]
outcorr[i, ] = corr[1,]
}
names(type) = colnames(gev)
colnames(outcorr) = colnames(ctd1)
rownames(outcorr) = colnames(gev)
#outcorr = cbind( subtype=type, outcorr)
#write.table( outcorr, file=fout.corr, sep="\t", col.names=NA,
quote=FALSE)
return(
list(subtype=type,
probes=p1,
genes=genes1,
origGenes=genes, noGenes=setdiff(genes, genes1), corr=outcorr ))

}
2 (sample size)
library(pwr)
0.8
100 80 ( 0.8)
cohen.ES(test="chisq",size="medium") # small: 1-2%, medium: 313%, large: 14-26%

Conventional effect size from Cohen (1982)
test = chisq
size = medium
effect.size = 0.3
pwr.chisq.test(w=0.3,df=1,power=0.8)
#Usually
power
is
setted
as
0.8 When we test 10 times, we can get 80% resulets with truth.
Chi squared power calculation
w = 0.3
N = 87.20955
df = 1
sig.level = 0.05
power = 0.8
NOTE: N is the number of observations
Total Sample size = 2* n( 88*2 )
Bimodal distribution
library(mclust)
library(ClassDiscovery)
library(Biobase)
exprsFile
<-
"c:/Program
Files/R/R-
2.9.0/library/datasets/data/63kinase51cell.txt"
exprs
<-
as.matrix(read.table(exprsFile,
header
TRUE,sep
"\t",
row.names = 1, as.is = TRUE))

dim(exprs)
exampleSet <- new("ExpressionSet", exprs = exprs)
mat <- exprs(exampleSet)
bimodalIndex(mat)
bi<-bimodalIndex(mat)
write.table(bi,file="c:\\BI_63kinase.txt",row.names=TRUE,col.names=TRU
E,sep="\t",quote=F, dec=".")
d1<-read.table("65-555-reg.txt", header=T)
d2<-data.frame(time=c(d1[,1]), cases=c(d1[,2]), distance=c(d1[,3]))
attach(d2)
scatterplot3d(cases, distance, time, angle=20, col.axis="blue",
col.grid="lightblue", main="Three-dimensional scatterplot",
pch=21, box=F, cex.symbols=2)
detach(d2)

R Command2

Uploaded by

Document Information

Original Title

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

R Command2

Uploaded by

Copyright:

Available Formats

library(verification)

t( PC$sd * t( PC$loadings ) )[, drop = FALSE] #

panel.cor <- function(x, y, digits=2, prefix="", cex.cor)

# name of data set is all

M106 1.0000000 0.7818627 0.5588235 0.6348039 0.6004902 0.7426471

M116 0.7426471 0.6936275 0.7083333 0.4607843 0.5490196 1.0000000

M106 0.4460784 0.5906863

Age- continuous value

Fisher Exact test

Number of ROW ncol:

X-squared = 0.17, df = 2, p-value = 0.9185

Pearson's Chi-squared test

X-squared = 51.6556, df = 6, p-value = 2.187e-09

Cochran-Mantel-Haenszel Chi-Squared Test for Count Data v2.11.0

Odds ratio from prospective/X-sectional study

95% CI = 0.72 , 5.81

can Non Total

95% CI = 0.72 5.81

1 d.f. , P value = 0.14

Fisher's exact test (2-sided) P value = 0.168

OR lower lim. upper lim. P value

M-H combined 2.37

M-H Chi2(1) = 2.86 , P value = 0.091

Stratified prospective/X-sectional analysis

recnon: OR = 2.68 (0.75, 11.22)

MH-OR = 2.37 (0.87, 6.42)

x<-matrix(c(70,30,45,5,10,13),2,byrow=T) #2 : Number of ROW (=yoko

mantelhaen.test(x, y = NULL, z = NULL,

x either a 3-dimensional contingency table in array form where each dimension

Kruskal-Wallis (Boxpolt :Kruskal-Wallis :Wilcox)

Kruskal-Wallis chi-squared = 2.3641, df = 2, p-value = 0.3066

Centroid TN according to subgroup definition

ERnegativeHer2negative (by gene)

bb=="ERnegHer2neg"bb ERnegHer2neg categorical

"Others (by gene) "), lty=c(1,1), lwd=c(3,3), col=c("blue", "red"))

expression",xlim=c(0,16), lty=2, main="",ylim=c(0,0.6)) #lwd wide

Log2 converted data

#tk: data set

"HER2pos"), lwd=c(3,3,3), col=c("blue", "red", "green"))

Logistic Regression Model

#p value, stepwise methods

BR3<-cut(PTK6, c(0, 6,15))

#divide to sub group

Logistic Regression Model 2

factor : categorical value continuous

Logistic Regression Model 3

0 *** 0.001 ** 0.01 * 0.05 . 0.1 1

(Dispersion parameter for binomial family taken to be 1)

Residual deviance: 201.28

(Intercept) 0.1586248 17.7689406

low ~ age + lwt + race + smoke + ptl + ht + ui + ftv

low ~ age + lwt + race + smoke + ptl + ht + ui

low ~ lwt + race + smoke + ptl + ht + ui

0 *** 0.001 ** 0.01 * 0.05 . 0.1 1

(Dispersion parameter for binomial family taken to be 1)

Residual deviance: 201.99

#impotant! To obtain odds ratio

print(exp(confint(res2))) #95% coeficient

(Dispersion parameter for binomial family taken to be 1)

Residual deviance: 375.11

relapse ~ HER2byGENE + ERStatus + IKS3Label

relapse ~ HER2byGENE + ERStatus

-4.325 1.53e-05 *** If we get the

0 * 0.001 0.01 * 0.05 . 0.1 1

0 * 0.001 0.01 * 0.05 . 0.1 1

0 * 0.001 0.01 * 0.05 . 0.1 1

0 `' 0.001 `' 0.01 `' 0.05 `.' 0.1 ` ' 1