You are on page 1of 11

library(simFrame)

library("mvtnorm")
numberofcases<-1000 #problem size
numberofvariables<-8
numberoflatent<-3
set.seed(123456) #structural model

The simulation code

effect<-matrix(c(1,0,.7,
0,1,.6,
0,.0,.39),nrow=numberoflatent,byrow=TRUE)

#measurement model
model<-matrix(c(.9,.8,.7,0,0,0,0,0,
0,0,.6,.8,.7,0,0,0,
0,0,0,0,0,.7,.6,.5),nrow=numberofvariables,ncol=numberoflatent,byrow=FALSE)

tmodel<-t(model) #transpose of model


model%*%tmodel #show the resulting latent structure

communality<-diag(model%*%tmodel) #find how much to weight true

#scores and errors given the measurement model

uniqueness<-1-communality
errorweight<-sqrt(uniqueness)
errorweight<-diag(errorweight) #how much to weight the errors

truescores<-matrix(rnorm(numberofcases*(numberoflatent)),numberofcases) #create
#true scores for the latent variables. Matrix 1000 by 3.
round(cor(truescores),2)
truescores<-truescores%*%effect #create true scores to reflect #structural
relations

observedscore<-truescores%*%tmodel
round(cor(observedscore),2) #show the true score correlation matrix
(without error)
error<- matrix(rnorm(numberofcases*(numberofvariables)),numberofcases) #create
normal error scores
error<-error%*%errorweight #matrix 1000 by 8.
observedscore<-observedscore+error #matrix 1000 by 8.
round(cor(observedscore),2) #show the correlation matrix
#give the data "realistic" properties

GREV<-round(observedscore[,1]*100+500,0)
GREQ<-round(observedscore[,2]*100+500,0)
GREA<-round(observedscore[,3]*100+500,0)
Ach<-round(observedscore[,4]*10+50,0)
Anx<-round(-observedscore[,5]*10+50,0)
Prelim<-round(observedscore[,6]+10,0)
GPA<-round(observedscore[,7]*.5+4,2)
MA<-round(observedscore[,8]*.5+3,1)

data<-data.frame(GREV,GREQ,GREA,Ach,Anx,Prelim,GPA,MA)
summary(data) #basic summary statistics
round(cor(data),2) #show the resulting correlations
#it is, of course, identical to the
pairs(data)

data=data
cc <- DARContControl(target = "GREQ", epsilon = 0.2,
fun = function(x) x * 100)

bar <- contaminate(data, cc)


ba

nc <- NAControl(NArate = 0.3)


setNA(data, nc)

CONTAMINATION

require(mvtnorm)
mean <- rep(0, 2)
sigma <- matrix(c(1, 0.5, 0.5, 1), 2, 2)
foo <- generate(size = 10, distribution = rmvnorm,
dots = list(mean = mean, sigma = sigma))
cc <- DARContControl(target = "V2",
epsilon = 0.2, fun = function(x) x * 100)
contaminate(foo, cc)

MISSINGNESS MECHANISM

data(data)
eusilcP$age[eusilcP$age < 0] <- 0 # this actually occurs
sam <- draw(data[, c("id", "age", "eqIncome")], size = 20)

## using control objects


# missing completely at random
mcarc <- NAControl(target = "eqIncome", NArate = 0.2)
setNA(sam, mcarc)

# missing at random
marc <- NAControl(target = "eqIncome", NArate = 0.2, aux = "age")
setNA(sam, marc)

# missing not at random


mnarc <- NAControl(target = "eqIncome",
NArate = 0.2, aux = "eqIncome")
setNA(sam, mnarc)

## supply slots of control object as arguments


# missing completely at random
setNA(sam, target = "eqIncome", NArate = 0.2)
# missing at random
setNA(sam, target = "eqIncome", NArate = 0.2, aux = "age")

# missing not at random


setNA(sam, target = "eqIncome", NArate = 0.2, aux = "eqIncome")

Method: Minimum Covariance Determinant Estimator for incomplete data.

R> library("rrcovNA")
R> data("bush10")
R> ## Compute MCD estimates for the modified bushfire data set
R> ## - show() and summary() examples
R> mcd <- CovNAMcd(bush10)
R> mcd
Call:
CovNAMcd(x = bus

4.15.1 Setup for the R-code

numberofcases<-1000 #problem size


numberofvariables<-8
numberoflatent<-3
set.seed(123456) #structural model

4.15.2 The simulation code

effect<-matrix(c(1,0,.7,
0,1,.6,
0,.0,.39),nrow=numberoflatent,byrow=TRUE)

#measurement model
model<-matrix(c(.9,.8,.7,0,0,0,0,0,
0,0,.6,.8,.7,0,0,0,
0,0,0,0,0,.7,.6,.5),nrow=numberofvariables,ncol=numberoflatent,byrow=FALSE)

tmodel<-t(model) #transpose of model


model%*%tmodel #show the resulting latent structure

communality<-diag(model%*%tmodel) #find how much to weight true

#scores and errors given the measurement model

uniqueness<-1-communality
errorweight<-sqrt(uniqueness)
errorweight<-diag(errorweight) #how much to weight the errors
truescores<-matrix(rnorm(numberofcases*(numberoflatent)),numberofcases) #create
#true scores for the latent variables. Matrix 1000 by 3.
round(cor(truescores),2)
truescores<-truescores%*%effect #create true scores to reflect #structural
relations

observedscore<-truescores%*%tmodel
round(cor(observedscore),2) #show the true score correlation matrix
(without error)
error<- matrix(rnorm(numberofcases*(numberofvariables)),numberofcases) #create
normal error scores
error<-error%*%errorweight #matrix 1000 by 8.
observedscore<-observedscore+error #matrix 1000 by 8.
round(cor(observedscore),2) #show the correlation matrix
#give the data "realistic" properties

V1<-round(observedscore[,1]*100+500,0)
V2<-round(observedscore[,2]*100+500,0)
V3<-round(observedscore[,3]*100+500,0)
V4<-round(observedscore[,4]*10+50,0)
V5<-round(-observedscore[,5]*10+50,0)
V6<-round(observedscore[,6]+10,0)
V7<-round(observedscore[,7]*.5+4,2)
V8<-round(observedscore[,8]*.5+3,1)

simdata<-data.frame(V1,V2,V3,V4,V5,V6,V7,V8)
summary(simdata) #basic summary statistics
round(cor(simdata),2) #show the resulting correlations
#it is, of course, identical to the
#previous one

4.15.3 Adding Contamination


Having successfully simulated the multivariate data according to the simulation
design under consideration, the next thing is to get the datasets contaminated at
varying degrees according to the simulation design.

data=simdata
cc <- DARContControl(target = "V1", epsilon = 0.5,
fun = function(x) x * 100)
contdata <- contaminate(simdata, cc)
contdata

4.15.4 Inserting Missing Values

From the simulated complete data set which has been contaminated, some percentages
of the data were set missing under the three missingness mechanism; missing
completely at random, missing at random and not missing at random missingness
mechanism.

set.seed(12345)

data=contdata)

## using control objects


# missing completely at random
sam <- draw((contdata), size = 1000)
mcarc <- NAControl(target = "V1", NArate = 0.2)
setNA(sam, mcarc)

# missing at random
marc <- NAControl(target = "V1", NArate = 0.2, aux = "V5")
setNA(sam, marc)

# missing not at random


mnarc <- NAControl(target = "V1",
NArate = 0.2, aux = "V1")
moon=setNA(sam, mnarc)

4.15.5 Robust Imputation


stardata=data.frame(moon[,1:8])
pool=irmi(stardata)

4.15.6 Robust Estimation of Location and Scale

Method: Minimum Covariance Determinant Estimator for incomplete data.

library("rrcovNA")
data=pool
## Compute MCD estimates for the modified bushfire data set
## - show() and summary() examples
mcd <- CovNAMcd(stardata)
mcd

4.15.1 Setup for the R-code

numberofcases<-500 #problem size


numberofvariables<-6
numberoflatent<-3
#set.seed(123456) #structural model

#4.15.2 The simulation code

effect<-matrix(c(1,0,.7,
0,1,.6,
0,.0,.39),nrow=numberoflatent,byrow=TRUE)

#measurement model
model<-matrix(c(1.332,0.443,1.46,0.175,0.015,0.014,1.328,
1.771,0.586,0.190,0.059,0.015,1.317,0.048,1.903,0.618,0.190,0.48),
nrow=numberofvariables,ncol=numberoflatent,byrow=FALSE)

tmodel<-t(model) #transpose of model


solve(model%*%tmodel) #show the resulting latent structure

communality<-diag(model%*%tmodel) #find how much to weight true

#scores and errors given the measurement model

uniqueness<-communality
errorweight<-sqrt(uniqueness)
errorweight<-diag(errorweight) #how much to weight the errors

truescores<-matrix(rnorm(numberofcases*(numberoflatent)),numberofcases) #create
#true scores for the latent variables. Matrix 1000 by 3.
round(cor(truescores),2)
truescores<-truescores%*%effect #create true scores to reflect #structural
relations

observedscore<-truescores%*%tmodel
round(cor(observedscore),2) #show the true score correlation matrix
(without error)
error<- matrix(rnorm(numberofcases*(numberofvariables)),numberofcases) #create
normal error scores
error<-error%*%errorweight #matrix 1000 by 6.
(observedscore)<-observedscore+error #matrix 1000 by 6.
round(cor(observedscore),2) #show the correlation matrix
#give the data "realistic" properties

V1<-round(observedscore[,1]*100+500,0)
V2<-round(observedscore[,2]*100+500,0)
V3<-round(observedscore[,3]*10+50,0)
V4<-round(-observedscore[,4]*10+50,0)
V5<-round(observedscore[,5]*5 +10,0)
V6<-round(observedscore[,6]*.5+4,2)

simdata<-data.frame(V1,V2,V3,V4,V5,V6)
summary(simdata) #basic summary statistics
round(cor(simdata),2) #show the resulting correlations
#it is, of course, identical to the
#previous one
pairs(simdata)

4.15.3 Adding Contamination


Having successfully simulated the multivariate data according to the simulation
design under consideration, the next thing is to get the datasets contaminated at
varying degrees according to the simulation design.

data=simdata
cc <- DARContControl(target = "V1", epsilon = 0.5,
fun = function(x) x * 100)
contdata <- contaminate(simdata, cc)
contdata

4.15.4 Inserting Missing Values

From the simulated complete data set which has been contaminated, some percentages
of the data were set missing under the three missingness mechanism; missing
completely at random, missing at random and not missing at random missingness
mechanism.

set.seed(12345)

data=contdata)

## using control objects


# missing completely at random
sam <- draw((contdata), size = 1000)
mcarc <- NAControl(target = "V1", NArate = 0.2)
setNA(sam, mcarc)

# missing at random
marc <- NAControl(target = "V1", NArate = 0.2, aux = "V5")
setNA(sam, marc)

# missing not at random


mnarc <- NAControl(target = "V1",
NArate = 0.2, aux = "V1")
moon=setNA(sam, mnarc)

4.15.5 Robust Imputation


stardata=data.frame(moon[,1:8])
pool=irmi(stardata)

4.15.6 Robust Estimation of Location and Scale

Method: Minimum Covariance Determinant Estimator for incomplete data.

library("rrcovNA")
data=pool
## Compute MCD estimates for the modified bushfire data set
## - show() and summary() examples
mcd <- CovNAMcd(stardata)
mcd

4.15.1 Setup for the R-code

numberofcases<-500 #problem size


numberofvariables<-6
numberoflatent<-3
#set.seed(123456) #structural model

#4.15.2 The simulation code

effect<-matrix(c(1,0,.7,
0,1,.6,
0,.0,.39),nrow=numberoflatent,byrow=TRUE)

#measurement model
model<-matrix(c(1.332,0.443,1.46,0.175,0.015,0.014,1.328,
1.771,0.586,0.190,0.059,0.015,1.317,0.048,1.903,0.618,0.190,0.48),
nrow=numberofvariables,ncol=numberoflatent,byrow=FALSE)

tmodel<-t(model) #transpose of model


solve(model%*%tmodel) #show the resulting latent structure

communality<-diag(model%*%tmodel) #find how much to weight true

#scores and errors given the measurement model

uniqueness<-communality
errorweight<-sqrt(uniqueness)
errorweight<-diag(errorweight) #how much to weight the errors

truescores<-matrix(rnorm(numberofcases*(numberoflatent)),numberofcases) #create
#true scores for the latent variables. Matrix 1000 by 3.
round(cor(truescores),2)
truescores<-truescores%*%effect #create true scores to reflect #structural
relations

observedscore<-truescores%*%tmodel
round(cor(observedscore),2) #show the true score correlation matrix
(without error)
error<- matrix(rnorm(numberofcases*(numberofvariables)),numberofcases) #create
normal error scores
error<-error%*%errorweight #matrix 1000 by 6.
(observedscore)<-observedscore+error #matrix 1000 by 6.
round(cor(observedscore),2) #show the correlation matrix
#give the data "realistic" properties

V1<-round(observedscore[,1]*100+500,0)
V2<-round(observedscore[,2]*100+500,0)
V3<-round(observedscore[,3]*10+50,0)
V4<-round(-observedscore[,4]*10+50,0)
V5<-round(observedscore[,5]*5 +10,0)
V6<-round(observedscore[,6]*.5+4,2)

simdata<-data.frame(V1,V2,V3,V4,V5,V6)
summary(simdata) #basic summary statistics
round(cor(simdata),2) #show the resulting correlations
#it is, of course, identical to the
#previous one
pairs(simdata)

4.15.3 Adding Contamination


Having successfully simulated the multivariate data according to the simulation
design under consideration, the next thing is to get the datasets contaminated at
varying degrees according to the simulation design.

data=simdata
cc <- DARContControl(target = "V1", epsilon = 0.5,
fun = function(x) x * 100)
contdata <- contaminate(simdata, cc)
contdata

4.15.4 Inserting Missing Values

From the simulated complete data set which has been contaminated, some percentages
of the data were set missing under the three missingness mechanism; missing
completely at random, missing at random and not missing at random missingness
mechanism.

set.seed(12345)

data=contdata)

## using control objects


# missing completely at random
sam <- draw((contdata), size = 1000)
mcarc <- NAControl(target = "V1", NArate = 0.2)
setNA(sam, mcarc)

# missing at random
marc <- NAControl(target = "V1", NArate = 0.2, aux = "V5")
setNA(sam, marc)

# missing not at random


mnarc <- NAControl(target = "V1",
NArate = 0.2, aux = "V1")
moon=setNA(sam, mnarc)

numberofcases<-1000 #problem size


numberofvariables<-5
numberoflatent<-3
set.seed(123456) #structural model

#4.15.2 The simulation code

effect<-matrix(c(1,0,.7,
0,1,.6,
0,.0,.39),nrow=numberoflatent,byrow=TRUE)

#measurement model
model<-
matrix(c(5,1,0,5,1,4,2,3,4,2,9,1,1,9,1),nrow=numberofvariables,ncol=numberoflatent,
byrow=FALSE)

tmodel<-t(model) #transpose of model


(model%*%tmodel) #show the resulting latent structure

communality<-diag(model%*%tmodel) #find how much to weight true

#scores and errors given the measurement model

uniqueness<-communality
errorweight<-sqrt(uniqueness)
errorweight<-diag(errorweight) #how much to weight the errors

truescores<-matrix(rnorm(numberofcases*(numberoflatent)),numberofcases) #create
#true scores for the latent variables. Matrix 1000 by 3.
round(cor(truescores),2)
truescores<-truescores%*%effect #create true scores to reflect #structural
relations

observedscore<-truescores%*%tmodel
round(cor(observedscore),2) #show the true score correlation matrix
(without error)
error<- matrix(rnorm(numberofcases*(numberofvariables)),numberofcases) #create
normal error scores
error<-error%*%errorweight #matrix 1000 by 8. .
observedscore<-observedscore+error #matrix 1000 by 8.
round(cor(observedscore),2) #show the correlation matrix
#give the data "realistic" properties

V1<-round(observedscore[,1]*100+500,0)
V2<-round(observedscore[,2]*100+500,0)
V3<-round(observedscore[,3]*100+500,0)
V4<-round(observedscore[,4]*10+50,0)
V5<-round(-observedscore[,5]*10+50,0)
V6<-round(observedscore[,6]+10,0)
V7<-round(observedscore[,7]*.5+4,2)
V8<-round(observedscore[,8]*.5+3,1)

data<-data.frame(V1,V2,V3,V4,V5)
summary(data) #basic summary statistics
round(cor(data),2) #show the resulting correlations
#it is, of course, identical to the
#previous one
pairs(data)
#4.15.3 Adding Contamination
#Having successfully simulated the multivariate data according to the simulation
design under consideration, the next thing is to get the datasets contaminated at
varying degrees according to the simulation design.

library(mvtnorm)
set.seed(12345)
data=data
cc <- DARContControl(target = "V1", epsilon = 0.2,
fun = function(x) x * 100)
bar <- contaminate(data, cc)
bar
plot(V1)

#4.15.4 Inserting Missing Values

#From the simulated complete data set which has been contaminated, some percentages
of the data were set missing under the three missingness mechanism; missing
completely at random, missing at random and not missing at random missingness
mechanism.
#missing completely at random mechanism

nc <- NAControl(NArate = 0.3)


moon=setNA(bar, nc)

library("rrcovNA")
data=pool
## Compute MCD estimates for the modified bushfire data set
## - show() and summary() examples
mcd <- CovNAMcd(stardata)
mcd

# 4.15.5 Robust Imputation


library(VIM)
data=moon
star=irmi(moon)
summary(star)
mcd <- CovNAMcd(star)
mcd

You might also like