Professional Documents
Culture Documents
library(Hmisc)
library(Design)
library(rpart)
library(survival)
library(Epi)
require(MASS)
library(car)
library(graphics)
library(wle)
library(epiR)
library(stats)
library(epicalc)
library(hexbin)
library(ISwR)
library(sma)
library(geneplotter)
library(beeswarm)
library(gstat)
library ("fBasics")
library ("Rcmdr")
library(tcltk)
library(scatterplot3d)
library(amap)
library(pwr)
library(Biobase)
library(mclust)
library(ClassDiscovery)
library(scatterplot3d)
data(name of data base)
attach(name of data base)
detach(name of data base)
install.packages("verification",dep=T)
install.packages("Hmisc",dep=T)
install.packages("Design",dep=T)
install.packages("rpart",dep=T)
install.packages("survival",dep=T)
install.packages("amap",dep=T)
install.packages("Epi",dep=T)
install.packages("MASS",dep=T)
install.packages("car",dep=T)
install.packages("wle",dep=T)
install.packages("epiR",dep=T)
install.packages("stat",dep=T)
install.packages("epicalc",dep=T)
install.packages("hexbin",dep=T)
install.packages("ISwR",dep=T)
install.packages("sma",dep=T)
install.packages("lattice",dep=T)
install.packages("beeswarm",dep=T)
install.packages("gstat",dep=T)
install.packages("Rcmdr",dep=T)
install.packages("pwr",dep=T)
install.packages("mclust",dep=T)
install.packages("ClassDiscovery",dep=T)
install.packages("Biobase",dep=T)
install.packages("scatterplot3d",dep=T)
source("http://bioinformatics.mdanderson.org/OOMPA/oompaLite.R")
oompaLite()
oompainstall(groupName="all")
source("http://www.stat.washington.edu/mclust/license.txt")
A:Principal
Components Analysis
PC <- princomp(~SET_pos_106_USE+X205225_at, cor=TRUE, data=IHC_465)
PC
Call:
princomp(formula = ~SET_pos_106_USE + X205225_at, data = IHC_465,
cor = TRUE)
Standard deviations:
Comp.1
Comp.2
1.3788400 0.3143252
2
variables and
465 observations.
unclass(loadings(PC)) #
Comp.1
Comp.2
SET_pos_106_USE -0.7071068
0.7071068
X205225_at
-0.7071068 -0.7071068
PC$sd^2 #
Comp.1
Comp.2
1.90119966 0.09880034
Comp.2
SET_pos_106_USE -0.974987
X205225_at
0.2222615
-0.974987 -0.2222615
unclass(loadings(PC))[,c(1, 2)] #
Comp.1
Comp.2
SET_pos_106_USE -0.7071068
X205225_at
0.7071068
-0.7071068 -0.7071068
0.7
0.5
0.6
lumAScore
0.8
0.9
Scatter plot
10
20
30
40
50
ONCOTYPE_DX_RS_score
Variance
10
scatterplot(lumAScore~ONCOTYPE_DX_RS_score)
10
Average
15
smoothScatter(Average,Variance,ylim=c(0,10),xlim=c(0,15))
smoothScatter(Average,Variance,ylim=c(0,10),xlim=c(0,15),ce
x.axis=2,cex.lab=2, cex.sub=2)
10
0.63
0.93
0.82
FOXM1_202580_x_at
0.59
0.91
0.85
MKI67_212021_s_at
0.77
0.71
TOP2A_FOXM1_NKI672
0.91
7 8 9
0.74
9.0
9.5
10
7.5
8.5
9.5
TOP2A_201292_at
11
13
8.5
GGIaverage
10
11
12
13
7.5
8.0
8.5
9.0
9.5 10.0
8.5
9.0
9.5
upper.panel=panel.cor,
data=
X203440_at
X202274_at
X201426_s_at
X201131_s_at
X201015_s_at
Mainz_ERpos_grade2)
X201015_s_at
X201131_s_at
X201426_s_at
X202274_at
X203440_at
data(all)
attach(all)
plot.cor(x= cor(all, method = "spearman" ),new = FALSE,labels =
names( all ),zlim
M106
M107
= c( -1.0, 1.0 ) )
M108
M111
M113
M116
M117
M121
Courier New
Non-parametric
Parametric
Wilcoxon
T-test
Kruskal wallis
ANOVA
Fisher
X2
Spearmann
Peason
Wilcoxon
t,test
x<-matrix(c(70,30,
45,5,
6,3),nrow=3,ncol=2,byrow=T)
#nrow :
2*3
42, 7,
36, 3,
x<-matrix(c(42,7,1,36,3,0),nrow=2,ncol=3,byrow=T)
fisher.test(x)
X2
70 30
45 2 X2 Fisher
Fisher
x<-matrix(c(70,30,45,5),2,byrow=T)
fisher.test(x)
Fisher's Exact Test for Count Data
p-value = 0.007264
Pearson's Chi-squared test
x<-matrix(c(70,30,45,5),2,byrow=T)
chisq.test(x)
Pearson's Chi-squared test with Yates' continuity correction
X-squared = 6.3773, df = 1, p-value = 0.01156
x<-matrix(c(7,6,10,14,9,18),2,byrow=T)
chisq.test(x)
Pearson's Chi-squared test
data:
3X3
x<matrix(c(652,1537,598,242,36,46,38,21,218,327,106,67),nrow=3,byrow=
T)
colnames(x)<-c("0","1-150","151-300",">300")
rownames(x)<-c("Married","Prev.married","Single")
x
0 1-150 151-300 >300
Married
1537
598
242
36
46
38
21
218
327
106
67
Prev.married
652
Single
chisq.test(x)
Odds of outcome
OR = 2
1/2
1/4
non-exposed
exposed
Exposure category
cancer
snp
neg
24
25
49
pos
10
21
31
Total
34
46
80
OR =
rec
2.68
0.749
11.22
0.105
rec rec
1.56
0.166
16.43
1.000
0.873
6.42
0.091
Odds of outcome
1/2
1/4
Non-exposed
Exposed
Outcome= cancer , Exposure= snp
is at least 2 and the last dimension corresponds to the strata, or a factor object
with at least 2 levels. y a factor object with at least 2 levels; ignored if x is an
array. z a factor object with at least 2 levels identifying to which stratum the
corresponding elements in x and y belong; ignored if x is an array. alternative
indicates the alternative hypothesis and must be one of "two.sided", "greater"
or "less". You can specify just the initial letter. Only used in the 2 by 2 by K
case. correct a logical indicating whether to apply continuity correction when
computing the test statistic. Only used in the 2 by 2 by K case. exact a logical
indicating whether the Mantel-Haenszel test or the exact conditional test (given
the strata margins) should be computed. Only used in the 2 by 2 by K case.
conf.level confidence level for the returned confidence interval. Only used in
the 2 by 2 by K case.
Anova
Age- continuous value
group-categorical (0,1,2,,,)
data(hako)
anova(lm(Age~group,data=hako) )
group
308.2
154.1
1.1583 0.3303 (P )
Age by group
3e-05
2e-05
Density
4e-05
5e-05
0e+00
1e-05
-1e+05
-5e+04
0e+00
5e+04
Centroid TN
Density
Data frame: mda
bb, ERnegHer2neg: categorical value
dd, continuous value
Centroid_TN.TN<-mda$dd[mda$bb=="ERnegHer2neg"]
legend=c("ERnegativeHer2negative
(by
gene)
",
converted
gene
0.3
0.0
0.1
0.2
Density
0.4
0.5
0.6
density.default(x = AKT1)
10
N = 286 Bandwidth = 0.1855
plot(density(AKT1))
abline(v=10) #Tate sen
abline(h=1.3) #Yoko sen
11
12
Histogram of Ozone
10
20
Frequency
20
10
Frequency
30
30
Histogram of Wind
10
15
20
Wind
50
100 150
Ozone
:
Data frame*: airquality
Wind: continuous value
Ozone: continuous value
attach(airquality)
shapiro.test(Wind)
W = 0.9858, p-value = 0.1178
shapiro.test(Ozone)
W = 0.8787, p-value = 2.790e-08
Wind Ozone
Histgram
layout(t(1:2))
hist(Wind)
main="Kinome,,,,dataset",xlab="Kinome
100, 600)
hist(Ozone)
10
15
20
Wind
3
layout(t(1:2:3))
hist(Wind)
hist(Wind)
hist(Wind)
20
10
0
0
10
Frequency
20
Frequency
30
20
0
10
Frequency
Histogram of Ozone
30
Histogram of Ozone
30
Histogram of Wind
50
100
Ozone
150
50
100
Ozone
150
TN",xlim=c(-
10
8
2
12
14
Stripchart
ERposHER2negIBC_nonrec
HER2posIBC_nonrec
HER2posIBC_rec
TNIBC_nonrec
TNIBC_rec
data(IBC)
attach(IBC)
mHT<-tapply(TIG1,IBC_rec_vs_nonrec,mean) #mean
sHT<-tapply(TIG1,IBC_rec_vs_nonrec,sd)
IS<-c(1,2,3,4,5)+0.15 #5 valuable 15%right
stripchart(TIG1~IBC_rec_vs_nonrec,method="jitter",vert=T,ylab="Log2
converted data")
points(IS,mHT,pch=1) #plot mean
arrows(IS,mHT-sHT,IS,mHT+sHT,code=3,angle=90,length=.1) #yokosen above
bar
10000
9500
9000
PUM1_201166_s_atROW
8500
8000
1Control_SUM149
2HDAC_SUM149
3Control_SUM190
4HDAC_SUM190
stripchart(PUM1_201166_s_atROW~group,method="jitter",vert=T,pch=
15, cex = 1.5)
Order rank
Data frame attach weht attach(weht)
Library(survival)
weht<-tk[order(tk$IKS),]
attach(weht)
plot(IKS,col=unclass(Molecular)+1,pch=16:16,xlab="Rank ordered by
gene expression", ylab="Immune Kinome Score") #
col=unclass(Molecular)+1pch=16:16
title("TRANSBIG data sets")
legend(locator(1),
legend=c("ERnegHER2neg",
"ERposHER2neg",
#odds ratio
exp(confint(x))
#95%coffidencial kukan
x<glm(relapse~HER2byGENE+ERStatus+IKS3Label,family=binomial,data=wk)
summary(x)
summary(x2<-step(x))
exp(coef(x2))
exp(confint(x2))
#95%coffidencial kukan
#divide to 2 group
x<-glm(pCR~BR3,family=binomial,data=md233,
subset=(Molecular=="ERposHER2neg"))
summary(x2<-step(x))
value P-value
x<-lrm(relapse~ERstatusBYgene+MKS12,x=TRUE, y=TRUE,data=wk)
x
p < 0.05
fastbw(x) #
race
low white black other
0
73
15
42
23
11
25
smoke<-(smoke>0);print(table(low,smoke))
smoke
low FALSE TRUE
0
86
44
29
30
bw<-data.frame(low,age,lwt,race,smoke,ptl,ht,ui,ftv)
#selection of
event+factor
detach(birthwt)
print(summary(res<-glm(low~.,family=binomial,data=bw)))
#low~age+lwt+,,
Call:
glm(formula = low ~ ., family = binomial, data = bw)
Deviance Residuals:
Min
1Q
Median
3Q
Max
-1.8946
-0.8212
-0.5316
0.9818
2.2125
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept)
0.480623
1.196888
0.402
0.68801
age
-0.029549
0.037031
-0.798
0.42489
lwt
-0.015424
0.006919
-2.229
0.02580 *
1.272260
0.527357
2.413
0.01584 *
raceblack
raceother
0.880496
0.440778
1.998
0.04576 *
smokeTRUE
0.938846
0.402147
2.335
0.01957 *
ptl
0.543337
0.345403
1.573
0.11571
ht
1.863303
0.697533
2.671
0.00756 **
ui
0.767648
0.459318
1.671
0.09467 .
ftv
0.065302
0.172394
0.379
0.70484
--Signif. codes:
on 188
degrees of freedom
on 179
degrees of freedom
AIC: 221.28
Number of Fisher Scoring iterations: 4
NagelkerkeR2<-function(rr,n)(1-exp((rr$dev-rr$null/n))/(1-exp(rr$null/n)))
print(NagelkerkeR2(res,nrow(bw)))
[1] -1.060980e+87
print(exp(coef(res)))
(Intercept)
age
smokeTRUE
ptl
1.6170819
2.5570281
lwt
ht
0.9708833
1.7217428
print(exp(confint(res)))
Waiting for profiling to be done...
2.5 %
ui
0.9846941
6.4449886
97.5 %
0.9014649
1.0429731
lwt
0.9706547
0.9975382
raceblack
1.2733620 10.2378101
raceother
1.0269690
5.8422688
smokeTRUE
1.1753715
5.7425658
ptl
0.8838560
3.4765158
ht
1.7030020 27.6935195
raceblack
ftv
3.5689085
2.1546928
raceother
1.0674812
2.4120956
ui
0.8662663
5.3169672
ftv
0.7534567
1.4900589
print(summary(res2<-step(res))) #impotant!
Start:
AIC=221.28
AIC
- ftv
201.43 219.43
- age
201.93 219.93
<none>
201.28 221.28
- ptl
203.83 221.83
- ui
204.03 222.03
- race
208.75 224.75
- lwt
206.80 224.80
- smoke
206.91 224.91
- ht
208.81 226.81
Step:
AIC=219.43
<none>
AIC
201.99 217.99
201.43 219.43
- ptl
203.95 219.95
- ui
204.11 220.11
- race
208.77 222.77
- lwt
206.81 222.81
- smoke
206.92 222.92
- ht
208.81 224.81
Step:
AIC=217.99
AIC
201.99 217.99
1
204.22 218.22
- ui
204.90 218.90
- smoke
207.73 221.73
- lwt
208.11 222.11
- race
210.31 222.31
- ht
209.46 223.46
Call:
glm(formula = low ~ lwt + race + smoke + ptl + ht + ui, family =
binomial,
data = bw)
Deviance Residuals:
Min
1Q
Median
3Q
Max
-1.9049
-0.8124
-0.5241
0.9483
2.1812
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.086550
0.951760
-0.091
0.92754
lwt
-0.015905
0.006855
-2.320
0.02033 *
raceblack
1.325719
0.522243
2.539
0.01113 *
raceother
0.897078
0.433881
2.068
0.03868 *
smokeTRUE
0.938727
0.398717
2.354
0.01855 *
ptl
0.503215
0.341231
1.475
0.14029
ht
1.855042
0.695118
2.669
0.00762 **
ui
0.785698
0.456441
1.721
0.08519 .
--Signif. codes:
on 188
degrees of freedom
on 181
degrees of freedom
AIC: 217.99
Number of Fisher Scoring iterations: 4
print(NagelkerkeR2(res2,nrow(bw))) #
[1] -2.138245e+87
print(exp(coef(res2)))
(Intercept)
lwt
ptl
ht
0.9170901
1.6540303
raceblack
raceother
smokeTRUE
2.4524265
2.5567241
ui
0.9842205
6.3919640
3.7648926
2.1939368
97.5 %
(Intercept) 0.1483796
6.2982590
lwt
0.9702639
0.9968395
raceblack
1.3555550 10.6805086
raceother
1.0617504
5.8773934
smokeTRUE
1.1846704
5.7092626
ptl
0.8552098
3.3045306
ht
1.6936142 27.2645876
ui
0.8879894
5.3881165
>
95 P
3.765
1.355
10.68
0.011
> library(survival)
Loading required package: splines
> library(MASS)
> data(wk)
> attach(wk)
>
x<-
glm(relapse~HER2byGENE+ERStatus+IKS3Label,family=binomial,data=wk)
> summary(x)
Call:
glm(formula = relapse ~ HER2byGENE + ERStatus + IKS3Label, family =
binomial,
data = wk)
Deviance Residuals:
Min
1Q
Median
3Q
Max
-1.0339
-0.9881
-0.8733
1.3427
1.5344
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept)
-0.347302
0.352044
-0.987
0.324
HER2byGENEHER2pos
-0.041258
0.327125
-0.126
0.900
ERStatusER+
-0.009563
0.298193
-0.032
0.974
IKS3Label2Intermediate -0.106124
0.302806
-0.350
0.726
IKS3Label3High
0.323853
-1.268
0.205
-0.410629
on 285
degrees of freedom
on 281
degrees of freedom
AIC: 385.11
Number of Fisher Scoring iterations: 4
> summary(x2<-step(x))
Start:
AIC=385.11
AIC
- IKS3Label
376.87 382.87
- ERStatus
375.12 383.12
- HER2byGENE
375.13 383.13
<none>
Step:
375.11 385.11
AIC=382.87
AIC
- HER2byGENE
376.93 380.93
- ERStatus
377.03 381.03
<none>
Step:
376.87 382.87
AIC=380.93
relapse ~ ERStatus
Df Deviance
- ERStatus
AIC
377.11 379.11
<none>
Step:
376.93 380.93
AIC=379.11
relapse ~ 1
Call:
glm(formula = relapse ~ 1, family = binomial, data = wk)
Deviance Residuals:
Min
1Q
Median
3Q
Max
-0.9623
-0.9623
-0.9623
1.4089
1.4089
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept)
-0.5295
0.1224
on 285
degrees of freedom
on 285
degrees of freedom
AIC: 379.11
Number of Fisher Scoring iterations: 4
> exp(coef(x2))
(Intercept)
0.5888889
> exp(confint(x2))
Waiting for profiling to be done...
2.5 %
97.5 %
0.4619652 0.7469545
>
R glm
R
glm(family=binomial)
y ~ x1+x2+x4
yx1x2x3
lr.data lr.data
x1, x2
Coefficients:
(Intercept)
x1
x2
-5.645581
0.008297
0.011386
95 Residual
76.71
AIC: 78.18
> summary(result)
Call:
glm(formula = y ~ x1 + x2, family = binomial, data = data)
Deviance Residuals:
Min
1Q
Median
3Q
Max
-1.4350
-0.5413
-0.4625
-0.3801
2.2197
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -5.645581
3.048239
-1.852
0.0640 .
x1
0.008297
0.021208
0.391
0.6956
x2
0.011386
0.005740
1.984
0.0473 *
--Signif. codes:
on 97
degrees of freedom
on 95
degrees of freedom
AIC: 78.184
Number of Fisher Scoring iterations: 5
> coefficients(result)
(Intercept)
x1
x2
-5.645580693
0.008297108
0.011386484
> residuals(result)
1
2.0972899
1.5625411
2.1967232
2.1767753
1.8665989
2.0548909
10
11
12
13
1.9670456
1.9349012
2.2197067
18
19
20
7
-0.4170057
8
14
1.7065171
15
21
16
17
1.9215695
-0.3640321
97
76.714
95
72.184
4.530
0.104
0.8
1.0
ROC-LDLT
0.6
0.4
Sens: 67.3%
Spec: 70.7%
PV+: 37.9%
PV-: 89.0%
Variable
est. (s.e.)
(Intercept) -2.992 (0.403)
test 0.000 (0.000)
0.2
Sensitivity
x = 6778.926
0.0
Model: y ~ x
Area under the curve: 0.733
0.0
0.2
0.4
0.6
0.8
1.0
1-Specificity
ROC ROC
Event 10
Data frame: mdk2
MKS: continuous value
data(mdk2)
x<-(MKS)
y<-c(rep(1,49),rep(0,184))
ROC(x,y,plot="ROC",main="ROC")
English version
roc.area(JBI_259$relapse, JBI_259$GGI_128)
rc<-rcorr.cens(JBI_259$GGI_128,JBI_259$relapse)
rc
aucbas=0.5 + (.5 * (rc[2]-(1.96*(rc[3]/2))))
auchaut=0.5 + (.5 * (rc[2]+(1.96*(rc[3]/2))))
aucbas
auchaut
#Ex: CI95 0.6-0.9
15000
10000
0
5000
ROW data
20000
25000
Cell19
Cell23
Cell51
JBI
Transbig
Wang
HER2pos
and
molecular
subgroup\n
JBI
dataset",
ylab="Centroid HER2pos")
boxplot(bb~aa,data=ea,main="Kinoma score ERpos/Her2neg-A",ylab="ROW
data",
ylim=c(0,25000),names=c("Cell19","Cell23","Cell51","JBI","Transbig"
,"Wang"), col=rainbow(10))
legend(locator(1), legend=c("Kruskal-Wallis p=0.0579"))
14
12
10
8
6
4
10
11
12
boxplot(SUM149.0uM.1,SUM149.0uM.2,SUM149.0uM.3,SUM149.1uM.1,SUM149.
1uM.2,SUM149.1uM.3,SUM190.0uM.1,SUM190.0uM.2,SUM190.0uM.3,SUM190.1u
M.1,SUM190.1uM.2,SUM190.1uM.3)
Univariate analysis
Library(survival)
x<coxph(Surv(X120_RFSmonths,relapse)~IKS3Label,method="breslow",data=
wk, subset=(Molecular=="TN"))
summary(x)
Multivariate analysis
Library(survival)
x<coxph(Surv(X120_RFSmonths,relapse)~MKS100+IKS50,method="breslow",da
ta=tk,subset=(ERbyGENE=="ERpos"))
summary(x)
Call:
coxph(formula = Surv(time, event) ~ Pt, data = KM3)
n= 136
coef exp(coef) se(coef)
Pt 1.08
2.94
2.94
Rsquare= 0.048
0.34
1.32
6.56
on 1 df,
= 6.95
on 1 df,
p=0.00838
on 1 df,
p=0.0057
x<-coxph(Surv(X120_RFSmonths,
relapse)~BR3,method="breslow",data=WGG,
subset=(Molecular=="ERposHER2neg"))
summary(x)
Continuous value categorical value
x<-c(MKS/100) #100 Colum
#p value
#odds ratio
#95%coffidencial kukan
library(survival)
x<-survfit(Surv(X120_RFSmonths, relapse)~ IKS4Label, conf.int=.95,
subset = (Molecular == "ERposHER2neg"))
plot(x,
xlab="Months",
legend.text=c("Low",
ylab="Distant
Event
Free
Survival",
"Low-Intermediate","High-
0.6
0.4
0.2
0.8
1.0
Low
Low-Intermediate
0.0
High-Intermediate
High
0
50
100
150
Months
KM
x<-survfit(Surv(RFSmonths,
relapse)~
IKS4Label,
conf.int=.95,
xlab="Months",
ylab="Distant
legend.text=c("Low",
Intermediate","High"),
Event
Free
Survival",
"Low-Intermediate","Highlty=c(2,4,6,8),
col=c("red",
"green",
"blue","orange"))
title("DEFS according to IKS sub group ERpos/Her2neg Wang dataset")
legend(locator(1), legend=c("Logrank test p=0.0579"))
0.6
0.4
0.0
0.2
0.8
1.0
No.
28
23
18
14
13
11
11
10
1Low
28
26
24
22
22
21
21
20
16
10
2High
12
24
36
48
60
72
84
96
108
120
Months
x<-survfit(Surv(X120_RFSmonths,
relapse)~
TIG1TNLabel2,
n.risk=TRUE,
col=c("red",
conf.type=c("none"),
"blue"),
lwd=c(2,2),
se.fit=FALSE,
xlab="Months",
1.0
0.8
0.6
0.4
0.0
0.2
Survival Probability
360
720
1080
1440
1800
2160
2520
Days
library(verification)
library(Hmisc)
library(Design)
library(survival)
data(KM3)
attach(KM3)
d <- datadist(KM3)
options(datadist="d")
srv<-Surv(time,event)
fit<-survfit(srv~Pt, data=KM3)
fit
Call: survfit(formula = srv ~ Pt, data = KM3)
n events median 0.95LCL 0.95UCL
0 97
12
Inf
2879
Inf
1 39
12
Inf
1983
Inf
2880
3240
3600
survplot(fit,type="kaplanmeier",pr=TRUE,conf.int=TRUE,xlim=c(0,3600),time.inc=360)
legend(locator(1),
legend=c("Logrank
test
summary(fit)
Call: survfit(formula = srv ~ GGIaverageLabel, data = JBI_grade2)
4 observations deleted due to missingness
1Low
time n.risk n.event survival std.err lower 95% CI upper 95% CI
13.0
64
0.969
0.0217
0.881
0.992
18.0
62
0.953
0.0264
0.862
0.985
27.5
60
0.937
0.0304
0.841
0.976
30.5
57
0.921
0.0340
0.820
0.966
40.7
55
0.904
0.0373
0.799
0.956
45.3
53
0.887
0.0403
0.777
0.945
45.4
52
0.870
0.0430
0.756
0.933
53.0
49
0.852
0.0456
0.735
0.920
84.0
36
0.829
0.0501
0.702
0.905
98.8
27
0.798
0.0569
0.657
0.885
2High
time n.risk n.event survival std.err lower 95% CI upper 95% CI
11.0
61
0.984
0.0163
0.889
0.998
14.1
60
0.967
0.0228
0.875
0.992
14.6
59
0.951
0.0277
0.855
0.984
16.2
58
0.934
0.0317
0.835
0.975
23.6
56
0.918
0.0352
0.814
0.965
25.2
54
0.901
0.0385
0.792
0.954
29.6
53
0.884
0.0413
0.771
0.943
37.2
50
0.866
0.0441
0.750
0.931
37.3
49
0.848
0.0466
0.729
0.918
41.0
48
0.831
0.0489
0.708
0.905
42.5
47
0.813
0.0509
0.688
0.892
43.6
46
0.795
0.0528
0.667
0.878
44.6
45
0.778
0.0545
0.648
0.865
53.0
40
0.758
0.0565
0.626
0.849
78.0
24
0.695
0.0672
0.542
0.806
87.0
20
0.660
0.0723
0.499
0.781
Scatter plot
pairs(~mpg+disp+drat+wt,data=mtcars,
main="Simple Scatterplot Matrix")
Counts
1602
1502
1402
1302
1202
1102
1002
902
802
701
601
501
401
301
201
101
1
Variance
15
10
0
2
10
12
14
Average
library(hexbin)
bin<-hexbin(Average, Variance, xbins=50)
plot(bin, main="Metformin data set")
library(car)
library(survival)
plot(PRLR~AVERAGE, col=unclass(Molecular)+1, pch=16, cex=1.5)
title("Scatter
plot
ERstatus
80%
concentrated
ellipse") #
col=unclass(ERbyGENE)+1
legend(locator(1),
legend=c("ERpos","ERneg"),
lty=c(1,1),
-0.2326059
cor.test(dose,len)
11.5
11.0
10.5
10.0
9.5
8.0
8.5
9.0
SET.pos.106.USE
10
12
ER.205225_at.USE
plot(SET.pos.106.USE~ER.205225_at.USE,
col=unclass(ER_Level4)+1,pch=unclass(ER_Level4)+1)
14
12000
10000
Pair
6000
0
2000
4000
mean of IGFBP7
8000
9
8
16
10
17
13
7
15
11
1
4
14
12
5
3
18
6
2
post
pre
pp
Pair:class
#pp:
pre-post,
a+b
Test - c
c+d
Total a + c
b+d
a+b+c+d
a+b
Test - c
c+d
Total a + c
b+d
a+b+c+d
Disease -
Total
Test +
670
202
872
Test -
74
640
714
Total
744
842
1586
True prevalence:
Sensitivity:
Specificity:
Diagnostic accuracy:
Youden's index:
---------------------------------------------------------
Others
Color
col=rainbow(10)
,col=red ,
col=c(red,blue,grey80, grey50,black,white)
,cm.colors(20) ,topo.colors(18)
Install
Epi
install.packages("Epi",dep=T)
lty=c(1,2,3) #lty line
Devide
x<-c(MKS/100) #100 Colum
plot(x,
xlab="Months",
ylab="Distant
legend.text=c("Low",
Event
Free
Survival",
"Low-Intermediate","High-
Conver to log2
library(Biobase)
data(wang)
attach(wang)
dataDirectory <- system.file("data", package = "datasets")
exprsFile <- file.path(dataDirectory, "wang.txt")
wang<- as.matrix(read.table(exprsFile, header = TRUE, sep = "\t",
row.names = 1, as.is = TRUE))
"lpus",
"www.lab.not.exist",
title
other
"jbi",
=
abstract
list(notes
"trial",
"Created
url
from
text
files"))
wlog=log2(wang)
write.table(wlog,
file="w_mas5log2",
quote=F,
sep="\t",
col.names=NA)
X
write.table(x$fs,
file="abcd.txt",
sep="\t",
col.names=T,row.names=T, quote=FALSE) #
Conbine
data(md233new)
data(md103new)
data(us)
x=cbind (md233new, md103new)
y=cbind (x, us)
write.table(y,
subset
subset=(Molecular=="TN")
subset(mda233,Molecular=="ERposHER2neg"|Molecular=="TN")
name of data set, Molecular:name of row
100 Colum
mda233:
x<-matrix(c(1,2,3,4,5,6),nrow=2,ncol=3)
x <- rnorm(100)
# 100
2 2
par(mfrow=c(2,2))
plot(sin)
plot(cos)
plot(asin)
plot(acos)
10
Density
0.0
2
10
10
10
density.default(x = TSPY1_217162_at)
10
Density
2
10
par(mfrow=c(3,3))
plot(density(BPY2_208331_at), xlim=c(2,10),xlab= "")
plot(density(CYorf14_207063_at), xlim=c(2,10), xlab= "")
plot(density(CSPG4LYP1_211461_at), xlim=c(2,10), xlab= "")
plot(density(USP9Y_206624_at), xlim=c(2,10), xlab= "")
plot(density(EIF1AY_204410_at), xlim=c(2,10), xlab= "")
plot(density(NLGN4Y_207703_at), xlim=c(2,10), xlab= "")
plot(density(LOC159110_216786_at), xlim=c(2,10), xlab= "")
plot(density(TSPY1_217162_at), xlim=c(2,10), xlab= "")
plot(density(SRY_207893_at), xlim=c(2,10),xlab= "")
Bar plot
10
0.00
0.2
Density
0.0
6
10
density.default(x = SRY_207893_at)
0.4
0.00
2
density.default(x = LOC159110_216786_at)
0.20
Density
0.20
0.10
Density
2
density.default(x = NLGN4Y_207703_at)
0.00
0.10
0.00
Density
density.default(x = EIF1AY_204410_at)
0.20
density.default(x = USP9Y_206624_at)
0.10
0.20
0.10
0.2
0.4
0.3
0.2
0.1
Density
2
Density
density.default(x = CSPG4LYP1_211461_at)
0.0
Density
density.default(x = CYorf14_207063_at)
density.default(x = BPY2_208331_at)
10
3.0
2.5
2.0
1.5
1.0
0.5
0.0
x<-matrix(c(0.246,
3.070,0.272,2.440,0.413,2.230,0.776,2.130,0.483,2.110,0.000,2.010,0.
726,1.980,0.355,1.980,0.586,1.970,0.451,1.970),2)
barplot(x,names=c("A","B","C","D","E","F","G","H","I","J"),beside=T,
col=c("black", "white"))
abline(h=1.30102999566398)
0.0
0.1
10
0.2
20
Ratio
0.3
30
0.4
0.5
40
Index
barplot(X.log,
ylim=c(0,40),axes=F)
abline(h=1.30102999566398)
axis(2)
par(new=T)
plot(Ratio,
axis(4)
axes=F, type="l",
lty=1,ylim=c(0,0.5))
PAM50
## Author: Yuan Qi <yqi1@mdanderson.org>
####
input:
mtx:
(MAS5-sc600)normalized,
log2-transformed
gene
expression values
PAM50.v2 <- function(mtx, fin.parameters="PAM50-parameters.RData" )
{
load(fin.parameters)
## added on 20091113F, for array platforms other than HGU133A.
if( nrow( mtx) != length(reference.median) ){
mtx = mtx[unlist(probes), ]
reference.median = reference.median[ unlist(probes) ]
}
mtx <- mtx - reference.median
len = sapply(probes, length)
p1 = character(length(len))
names(p1) = names(len)
for( i in 1:length(len)){
if( len[[i]] == 1){
p1[[i]] = probes[[i]]
}else if(len[[i]] > 1){
iqr1 = apply(mtx[probes[[i]],], 1, IQR)
p1[[i]] <- probes[[i]][ match(max(iqr1), iqr1) ]
}
}
gev <- mtx[p1,]
ctd1 <- ctd[names(p1),]
#type <- apply( gev, 2, function(x){
#corr <- cor(x, ctd1, method="spearman")
#colnames(corr)[match(max(corr), corr)]
#}
#)
list(subtype=type,
probes=p1,
genes=genes1,
2 (sample size)
library(pwr)
0.8
100 80 ( 0.8)
size = medium
effect.size = 0.3
pwr.chisq.test(w=0.3,df=1,power=0.8)
#Usually
power
is
setted
as
0.8 When we test 10 times, we can get 80% resulets with truth.
Chi squared power calculation
w = 0.3
N = 87.20955
df = 1
sig.level = 0.05
power = 0.8
NOTE: N is the number of observations
Bimodal distribution
library(mclust)
library(ClassDiscovery)
library(Biobase)
exprsFile
<-
"c:/Program
Files/R/R-
2.9.0/library/datasets/data/63kinase51cell.txt"
exprs
<-
as.matrix(read.table(exprsFile,
header
TRUE,sep
"\t",
d1<-read.table("65-555-reg.txt", header=T)
d2<-data.frame(time=c(d1[,1]), cases=c(d1[,2]), distance=c(d1[,3]))
attach(d2)
scatterplot3d(cases, distance, time, angle=20, col.axis="blue",
col.grid="lightblue", main="Three-dimensional scatterplot",
pch=21, box=F, cex.symbols=2)
detach(d2)