Professional Documents
Culture Documents
library("mvtnorm")
numberofcases<-1000 #problem size
numberofvariables<-8
numberoflatent<-3
set.seed(123456) #structural model
effect<-matrix(c(1,0,.7,
0,1,.6,
0,.0,.39),nrow=numberoflatent,byrow=TRUE)
#measurement model
model<-matrix(c(.9,.8,.7,0,0,0,0,0,
0,0,.6,.8,.7,0,0,0,
0,0,0,0,0,.7,.6,.5),nrow=numberofvariables,ncol=numberoflatent,byrow=FALSE)
uniqueness<-1-communality
errorweight<-sqrt(uniqueness)
errorweight<-diag(errorweight) #how much to weight the errors
truescores<-matrix(rnorm(numberofcases*(numberoflatent)),numberofcases) #create
#true scores for the latent variables. Matrix 1000 by 3.
round(cor(truescores),2)
truescores<-truescores%*%effect #create true scores to reflect #structural
relations
observedscore<-truescores%*%tmodel
round(cor(observedscore),2) #show the true score correlation matrix
(without error)
error<- matrix(rnorm(numberofcases*(numberofvariables)),numberofcases) #create
normal error scores
error<-error%*%errorweight #matrix 1000 by 8.
observedscore<-observedscore+error #matrix 1000 by 8.
round(cor(observedscore),2) #show the correlation matrix
#give the data "realistic" properties
GREV<-round(observedscore[,1]*100+500,0)
GREQ<-round(observedscore[,2]*100+500,0)
GREA<-round(observedscore[,3]*100+500,0)
Ach<-round(observedscore[,4]*10+50,0)
Anx<-round(-observedscore[,5]*10+50,0)
Prelim<-round(observedscore[,6]+10,0)
GPA<-round(observedscore[,7]*.5+4,2)
MA<-round(observedscore[,8]*.5+3,1)
data<-data.frame(GREV,GREQ,GREA,Ach,Anx,Prelim,GPA,MA)
summary(data) #basic summary statistics
round(cor(data),2) #show the resulting correlations
#it is, of course, identical to the
pairs(data)
data=data
cc <- DARContControl(target = "GREQ", epsilon = 0.2,
fun = function(x) x * 100)
CONTAMINATION
require(mvtnorm)
mean <- rep(0, 2)
sigma <- matrix(c(1, 0.5, 0.5, 1), 2, 2)
foo <- generate(size = 10, distribution = rmvnorm,
dots = list(mean = mean, sigma = sigma))
cc <- DARContControl(target = "V2",
epsilon = 0.2, fun = function(x) x * 100)
contaminate(foo, cc)
MISSINGNESS MECHANISM
data(data)
eusilcP$age[eusilcP$age < 0] <- 0 # this actually occurs
sam <- draw(data[, c("id", "age", "eqIncome")], size = 20)
# missing at random
marc <- NAControl(target = "eqIncome", NArate = 0.2, aux = "age")
setNA(sam, marc)
R> library("rrcovNA")
R> data("bush10")
R> ## Compute MCD estimates for the modified bushfire data set
R> ## - show() and summary() examples
R> mcd <- CovNAMcd(bush10)
R> mcd
Call:
CovNAMcd(x = bus
effect<-matrix(c(1,0,.7,
0,1,.6,
0,.0,.39),nrow=numberoflatent,byrow=TRUE)
#measurement model
model<-matrix(c(.9,.8,.7,0,0,0,0,0,
0,0,.6,.8,.7,0,0,0,
0,0,0,0,0,.7,.6,.5),nrow=numberofvariables,ncol=numberoflatent,byrow=FALSE)
uniqueness<-1-communality
errorweight<-sqrt(uniqueness)
errorweight<-diag(errorweight) #how much to weight the errors
truescores<-matrix(rnorm(numberofcases*(numberoflatent)),numberofcases) #create
#true scores for the latent variables. Matrix 1000 by 3.
round(cor(truescores),2)
truescores<-truescores%*%effect #create true scores to reflect #structural
relations
observedscore<-truescores%*%tmodel
round(cor(observedscore),2) #show the true score correlation matrix
(without error)
error<- matrix(rnorm(numberofcases*(numberofvariables)),numberofcases) #create
normal error scores
error<-error%*%errorweight #matrix 1000 by 8.
observedscore<-observedscore+error #matrix 1000 by 8.
round(cor(observedscore),2) #show the correlation matrix
#give the data "realistic" properties
V1<-round(observedscore[,1]*100+500,0)
V2<-round(observedscore[,2]*100+500,0)
V3<-round(observedscore[,3]*100+500,0)
V4<-round(observedscore[,4]*10+50,0)
V5<-round(-observedscore[,5]*10+50,0)
V6<-round(observedscore[,6]+10,0)
V7<-round(observedscore[,7]*.5+4,2)
V8<-round(observedscore[,8]*.5+3,1)
simdata<-data.frame(V1,V2,V3,V4,V5,V6,V7,V8)
summary(simdata) #basic summary statistics
round(cor(simdata),2) #show the resulting correlations
#it is, of course, identical to the
#previous one
data=simdata
cc <- DARContControl(target = "V1", epsilon = 0.5,
fun = function(x) x * 100)
contdata <- contaminate(simdata, cc)
contdata
From the simulated complete data set which has been contaminated, some percentages
of the data were set missing under the three missingness mechanism; missing
completely at random, missing at random and not missing at random missingness
mechanism.
set.seed(12345)
data=contdata)
# missing at random
marc <- NAControl(target = "V1", NArate = 0.2, aux = "V5")
setNA(sam, marc)
library("rrcovNA")
data=pool
## Compute MCD estimates for the modified bushfire data set
## - show() and summary() examples
mcd <- CovNAMcd(stardata)
mcd
effect<-matrix(c(1,0,.7,
0,1,.6,
0,.0,.39),nrow=numberoflatent,byrow=TRUE)
#measurement model
model<-matrix(c(1.332,0.443,1.46,0.175,0.015,0.014,1.328,
1.771,0.586,0.190,0.059,0.015,1.317,0.048,1.903,0.618,0.190,0.48),
nrow=numberofvariables,ncol=numberoflatent,byrow=FALSE)
uniqueness<-communality
errorweight<-sqrt(uniqueness)
errorweight<-diag(errorweight) #how much to weight the errors
truescores<-matrix(rnorm(numberofcases*(numberoflatent)),numberofcases) #create
#true scores for the latent variables. Matrix 1000 by 3.
round(cor(truescores),2)
truescores<-truescores%*%effect #create true scores to reflect #structural
relations
observedscore<-truescores%*%tmodel
round(cor(observedscore),2) #show the true score correlation matrix
(without error)
error<- matrix(rnorm(numberofcases*(numberofvariables)),numberofcases) #create
normal error scores
error<-error%*%errorweight #matrix 1000 by 6.
(observedscore)<-observedscore+error #matrix 1000 by 6.
round(cor(observedscore),2) #show the correlation matrix
#give the data "realistic" properties
V1<-round(observedscore[,1]*100+500,0)
V2<-round(observedscore[,2]*100+500,0)
V3<-round(observedscore[,3]*10+50,0)
V4<-round(-observedscore[,4]*10+50,0)
V5<-round(observedscore[,5]*5 +10,0)
V6<-round(observedscore[,6]*.5+4,2)
simdata<-data.frame(V1,V2,V3,V4,V5,V6)
summary(simdata) #basic summary statistics
round(cor(simdata),2) #show the resulting correlations
#it is, of course, identical to the
#previous one
pairs(simdata)
data=simdata
cc <- DARContControl(target = "V1", epsilon = 0.5,
fun = function(x) x * 100)
contdata <- contaminate(simdata, cc)
contdata
From the simulated complete data set which has been contaminated, some percentages
of the data were set missing under the three missingness mechanism; missing
completely at random, missing at random and not missing at random missingness
mechanism.
set.seed(12345)
data=contdata)
# missing at random
marc <- NAControl(target = "V1", NArate = 0.2, aux = "V5")
setNA(sam, marc)
library("rrcovNA")
data=pool
## Compute MCD estimates for the modified bushfire data set
## - show() and summary() examples
mcd <- CovNAMcd(stardata)
mcd
effect<-matrix(c(1,0,.7,
0,1,.6,
0,.0,.39),nrow=numberoflatent,byrow=TRUE)
#measurement model
model<-matrix(c(1.332,0.443,1.46,0.175,0.015,0.014,1.328,
1.771,0.586,0.190,0.059,0.015,1.317,0.048,1.903,0.618,0.190,0.48),
nrow=numberofvariables,ncol=numberoflatent,byrow=FALSE)
uniqueness<-communality
errorweight<-sqrt(uniqueness)
errorweight<-diag(errorweight) #how much to weight the errors
truescores<-matrix(rnorm(numberofcases*(numberoflatent)),numberofcases) #create
#true scores for the latent variables. Matrix 1000 by 3.
round(cor(truescores),2)
truescores<-truescores%*%effect #create true scores to reflect #structural
relations
observedscore<-truescores%*%tmodel
round(cor(observedscore),2) #show the true score correlation matrix
(without error)
error<- matrix(rnorm(numberofcases*(numberofvariables)),numberofcases) #create
normal error scores
error<-error%*%errorweight #matrix 1000 by 6.
(observedscore)<-observedscore+error #matrix 1000 by 6.
round(cor(observedscore),2) #show the correlation matrix
#give the data "realistic" properties
V1<-round(observedscore[,1]*100+500,0)
V2<-round(observedscore[,2]*100+500,0)
V3<-round(observedscore[,3]*10+50,0)
V4<-round(-observedscore[,4]*10+50,0)
V5<-round(observedscore[,5]*5 +10,0)
V6<-round(observedscore[,6]*.5+4,2)
simdata<-data.frame(V1,V2,V3,V4,V5,V6)
summary(simdata) #basic summary statistics
round(cor(simdata),2) #show the resulting correlations
#it is, of course, identical to the
#previous one
pairs(simdata)
data=simdata
cc <- DARContControl(target = "V1", epsilon = 0.5,
fun = function(x) x * 100)
contdata <- contaminate(simdata, cc)
contdata
From the simulated complete data set which has been contaminated, some percentages
of the data were set missing under the three missingness mechanism; missing
completely at random, missing at random and not missing at random missingness
mechanism.
set.seed(12345)
data=contdata)
# missing at random
marc <- NAControl(target = "V1", NArate = 0.2, aux = "V5")
setNA(sam, marc)
effect<-matrix(c(1,0,.7,
0,1,.6,
0,.0,.39),nrow=numberoflatent,byrow=TRUE)
#measurement model
model<-
matrix(c(5,1,0,5,1,4,2,3,4,2,9,1,1,9,1),nrow=numberofvariables,ncol=numberoflatent,
byrow=FALSE)
uniqueness<-communality
errorweight<-sqrt(uniqueness)
errorweight<-diag(errorweight) #how much to weight the errors
truescores<-matrix(rnorm(numberofcases*(numberoflatent)),numberofcases) #create
#true scores for the latent variables. Matrix 1000 by 3.
round(cor(truescores),2)
truescores<-truescores%*%effect #create true scores to reflect #structural
relations
observedscore<-truescores%*%tmodel
round(cor(observedscore),2) #show the true score correlation matrix
(without error)
error<- matrix(rnorm(numberofcases*(numberofvariables)),numberofcases) #create
normal error scores
error<-error%*%errorweight #matrix 1000 by 8. .
observedscore<-observedscore+error #matrix 1000 by 8.
round(cor(observedscore),2) #show the correlation matrix
#give the data "realistic" properties
V1<-round(observedscore[,1]*100+500,0)
V2<-round(observedscore[,2]*100+500,0)
V3<-round(observedscore[,3]*100+500,0)
V4<-round(observedscore[,4]*10+50,0)
V5<-round(-observedscore[,5]*10+50,0)
V6<-round(observedscore[,6]+10,0)
V7<-round(observedscore[,7]*.5+4,2)
V8<-round(observedscore[,8]*.5+3,1)
data<-data.frame(V1,V2,V3,V4,V5)
summary(data) #basic summary statistics
round(cor(data),2) #show the resulting correlations
#it is, of course, identical to the
#previous one
pairs(data)
#4.15.3 Adding Contamination
#Having successfully simulated the multivariate data according to the simulation
design under consideration, the next thing is to get the datasets contaminated at
varying degrees according to the simulation design.
library(mvtnorm)
set.seed(12345)
data=data
cc <- DARContControl(target = "V1", epsilon = 0.2,
fun = function(x) x * 100)
bar <- contaminate(data, cc)
bar
plot(V1)
#From the simulated complete data set which has been contaminated, some percentages
of the data were set missing under the three missingness mechanism; missing
completely at random, missing at random and not missing at random missingness
mechanism.
#missing completely at random mechanism
library("rrcovNA")
data=pool
## Compute MCD estimates for the modified bushfire data set
## - show() and summary() examples
mcd <- CovNAMcd(stardata)
mcd