Draft Sup Thesis

library(simFrame)
library("mvtnorm")
numberofcases<-1000 #problem size
numberofvariables<-8
numberoflatent<-3
set.seed(123456) #structural model
The simulation code
effect<-matrix(c(1,0,.7,
0,1,.6,
0,.0,.39),nrow=numberoflatent,byrow=TRUE)
#measurement model
model<-matrix(c(.9,.8,.7,0,0,0,0,0,
0,0,.6,.8,.7,0,0,0,
0,0,0,0,0,.7,.6,.5),nrow=numberofvariables,ncol=numberoflatent,byrow=FALSE)
tmodel<-t(model) #transpose of model

model%*%tmodel #show the resulting latent structure
communality<-diag(model%*%tmodel) #find how much to weight true
#scores and errors given the measurement model
uniqueness<-1-communality
errorweight<-sqrt(uniqueness)
errorweight<-diag(errorweight) #how much to weight the errors
truescores<-matrix(rnorm(numberofcases*(numberoflatent)),numberofcases) #create
#true scores for the latent variables. Matrix 1000 by 3.
round(cor(truescores),2)
truescores<-truescores%*%effect #create true scores to reflect #structural
relations
observedscore<-truescores%*%tmodel
round(cor(observedscore),2) #show the true score correlation matrix
(without error)
error<- matrix(rnorm(numberofcases*(numberofvariables)),numberofcases) #create
normal error scores
error<-error%*%errorweight #matrix 1000 by 8.
observedscore<-observedscore+error #matrix 1000 by 8.
round(cor(observedscore),2) #show the correlation matrix
#give the data "realistic" properties
GREV<-round(observedscore[,1]*100+500,0)
GREQ<-round(observedscore[,2]*100+500,0)
GREA<-round(observedscore[,3]*100+500,0)
Ach<-round(observedscore[,4]*10+50,0)
Anx<-round(-observedscore[,5]*10+50,0)
Prelim<-round(observedscore[,6]+10,0)
GPA<-round(observedscore[,7]*.5+4,2)
MA<-round(observedscore[,8]*.5+3,1)
data<-data.frame(GREV,GREQ,GREA,Ach,Anx,Prelim,GPA,MA)
summary(data) #basic summary statistics
round(cor(data),2) #show the resulting correlations
#it is, of course, identical to the
pairs(data)
data=data
cc <- DARContControl(target = "GREQ", epsilon = 0.2,
fun = function(x) x * 100)
bar <- contaminate(data, cc)

ba
nc <- NAControl(NArate = 0.3)

setNA(data, nc)
CONTAMINATION
require(mvtnorm)
mean <- rep(0, 2)
sigma <- matrix(c(1, 0.5, 0.5, 1), 2, 2)
foo <- generate(size = 10, distribution = rmvnorm,
dots = list(mean = mean, sigma = sigma))
cc <- DARContControl(target = "V2",
epsilon = 0.2, fun = function(x) x * 100)
contaminate(foo, cc)
MISSINGNESS MECHANISM
data(data)
eusilcP$age[eusilcP$age < 0] <- 0 # this actually occurs
sam <- draw(data[, c("id", "age", "eqIncome")], size = 20)
## using control objects

# missing completely at random
mcarc <- NAControl(target = "eqIncome", NArate = 0.2)
setNA(sam, mcarc)
# missing at random
marc <- NAControl(target = "eqIncome", NArate = 0.2, aux = "age")
setNA(sam, marc)
# missing not at random

mnarc <- NAControl(target = "eqIncome",
NArate = 0.2, aux = "eqIncome")
setNA(sam, mnarc)
## supply slots of control object as arguments

setNA(sam, target = "eqIncome", NArate = 0.2)
# missing at random
setNA(sam, target = "eqIncome", NArate = 0.2, aux = "age")

setNA(sam, target = "eqIncome", NArate = 0.2, aux = "eqIncome")
Method: Minimum Covariance Determinant Estimator for incomplete data.
R> library("rrcovNA")
R> data("bush10")
R> ## Compute MCD estimates for the modified bushfire data set
R> ## - show() and summary() examples
R> mcd <- CovNAMcd(bush10)
R> mcd
Call:
CovNAMcd(x = bus
4.15.1 Setup for the R-code

numberoflatent<-3
4.15.2 The simulation code
0,1,.6,
#measurement model
model<-matrix(c(.9,.8,.7,0,0,0,0,0,
0,0,.6,.8,.7,0,0,0,
0,0,0,0,0,.7,.6,.5),nrow=numberofvariables,ncol=numberoflatent,byrow=FALSE)

model%*%tmodel #show the resulting latent structure
uniqueness<-1-communality
relations
(without error)
normal error scores
V1<-round(observedscore[,1]*100+500,0)
V5<-round(-observedscore[,5]*10+50,0)
V6<-round(observedscore[,6]+10,0)
V7<-round(observedscore[,7]*.5+4,2)
simdata<-data.frame(V1,V2,V3,V4,V5,V6,V7,V8)
summary(simdata) #basic summary statistics
round(cor(simdata),2) #show the resulting correlations
#previous one
4.15.3 Adding Contamination

Having successfully simulated the multivariate data according to the simulation
design under consideration, the next thing is to get the datasets contaminated at
varying degrees according to the simulation design.
data=simdata
cc <- DARContControl(target = "V1", epsilon = 0.5,
contdata <- contaminate(simdata, cc)
contdata
4.15.4 Inserting Missing Values
From the simulated complete data set which has been contaminated, some percentages
of the data were set missing under the three missingness mechanism; missing
completely at random, missing at random and not missing at random missingness
mechanism.
set.seed(12345)
data=contdata)

sam <- draw((contdata), size = 1000)
mcarc <- NAControl(target = "V1", NArate = 0.2)
setNA(sam, mcarc)
# missing at random
marc <- NAControl(target = "V1", NArate = 0.2, aux = "V5")
setNA(sam, marc)

mnarc <- NAControl(target = "V1",
NArate = 0.2, aux = "V1")
moon=setNA(sam, mnarc)
4.15.5 Robust Imputation

stardata=data.frame(moon[,1:8])
pool=irmi(stardata)
4.15.6 Robust Estimation of Location and Scale
library("rrcovNA")
data=pool
## Compute MCD estimates for the modified bushfire data set
## - show() and summary() examples
mcd <- CovNAMcd(stardata)
mcd

numberoflatent<-3
#set.seed(123456) #structural model
#4.15.2 The simulation code
0,1,.6,
#measurement model
model<-matrix(c(1.332,0.443,1.46,0.175,0.015,0.014,1.328,
1.771,0.586,0.190,0.059,0.015,1.317,0.048,1.903,0.618,0.190,0.48),
nrow=numberofvariables,ncol=numberoflatent,byrow=FALSE)

solve(model%*%tmodel) #show the resulting latent structure
uniqueness<-communality
relations
(without error)
normal error scores
(observedscore)<-observedscore+error #matrix 1000 by 6.
V5<-round(observedscore[,5]*5 +10,0)
simdata<-data.frame(V1,V2,V3,V4,V5,V6)
#previous one
pairs(simdata)

data=simdata
contdata
mechanism.
set.seed(12345)
data=contdata)

setNA(sam, mcarc)
# missing at random
setNA(sam, marc)

4.15.5 Robust Imputation

stardata=data.frame(moon[,1:8])
pool=irmi(stardata)
4.15.6 Robust Estimation of Location and Scale
library("rrcovNA")
data=pool
mcd

numberoflatent<-3
#set.seed(123456) #structural model
0,1,.6,
#measurement model
model<-matrix(c(1.332,0.443,1.46,0.175,0.015,0.014,1.328,
1.771,0.586,0.190,0.059,0.015,1.317,0.048,1.903,0.618,0.190,0.48),
nrow=numberofvariables,ncol=numberoflatent,byrow=FALSE)

solve(model%*%tmodel) #show the resulting latent structure
relations
(without error)
normal error scores
(observedscore)<-observedscore+error #matrix 1000 by 6.
V5<-round(observedscore[,5]*5 +10,0)
simdata<-data.frame(V1,V2,V3,V4,V5,V6)
#previous one
pairs(simdata)

data=simdata
contdata
mechanism.
set.seed(12345)
data=contdata)

setNA(sam, mcarc)
# missing at random
setNA(sam, marc)


numberoflatent<-3
0,1,.6,
#measurement model
model<-
matrix(c(5,1,0,5,1,4,2,3,4,2,9,1,1,9,1),nrow=numberofvariables,ncol=numberoflatent,
byrow=FALSE)

(model%*%tmodel) #show the resulting latent structure
relations
(without error)
normal error scores
error<-error%*%errorweight #matrix 1000 by 8. .
V6<-round(observedscore[,6]+10,0)
data<-data.frame(V1,V2,V3,V4,V5)
summary(data) #basic summary statistics
round(cor(data),2) #show the resulting correlations
#previous one
pairs(data)
#4.15.3 Adding Contamination
#Having successfully simulated the multivariate data according to the simulation
library(mvtnorm)
set.seed(12345)
data=data
bar <- contaminate(data, cc)
bar
plot(V1)
#4.15.4 Inserting Missing Values
#From the simulated complete data set which has been contaminated, some percentages
mechanism.
#missing completely at random mechanism
nc <- NAControl(NArate = 0.3)

moon=setNA(bar, nc)
library("rrcovNA")
data=pool
mcd
# 4.15.5 Robust Imputation

library(VIM)
data=moon
star=irmi(moon)
summary(star)
mcd <- CovNAMcd(star)
mcd

Draft Sup Thesis

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Draft Sup Thesis

Uploaded by

Copyright:

Available Formats

library(simFrame)

The simulation code

tmodel<-t(model) #transpose of model

communality<-diag(model%*%tmodel) #find how much to weight true

#scores and errors given the measurement model

bar <- contaminate(data, cc)

nc <- NAControl(NArate = 0.3)

## using control objects

# missing not at random

## supply slots of control object as arguments

# missing not at random

Method: Minimum Covariance Determinant Estimator for incomplete data.

4.15.1 Setup for the R-code

numberofcases<-1000 #problem size

4.15.2 The simulation code

tmodel<-t(model) #transpose of model

communality<-diag(model%*%tmodel) #find how much to weight true

#scores and errors given the measurement model

4.15.3 Adding Contamination

4.15.4 Inserting Missing Values

## using control objects

# missing not at random

4.15.5 Robust Imputation

4.15.6 Robust Estimation of Location and Scale

Method: Minimum Covariance Determinant Estimator for incomplete data.

4.15.1 Setup for the R-code

numberofcases<-500 #problem size

#4.15.2 The simulation code

tmodel<-t(model) #transpose of model

communality<-diag(model%*%tmodel) #find how much to weight true

#scores and errors given the measurement model

4.15.3 Adding Contamination

4.15.4 Inserting Missing Values

## using control objects

# missing not at random

4.15.5 Robust Imputation

4.15.6 Robust Estimation of Location and Scale

Method: Minimum Covariance Determinant Estimator for incomplete data.

4.15.1 Setup for the R-code

numberofcases<-500 #problem size

#4.15.2 The simulation code

tmodel<-t(model) #transpose of model

communality<-diag(model%*%tmodel) #find how much to weight true

#scores and errors given the measurement model

4.15.3 Adding Contamination

4.15.4 Inserting Missing Values

## using control objects

# missing not at random

numberofcases<-1000 #problem size

#4.15.2 The simulation code

tmodel<-t(model) #transpose of model

communality<-diag(model%*%tmodel) #find how much to weight true

#scores and errors given the measurement model

#4.15.4 Inserting Missing Values

nc <- NAControl(NArate = 0.3)

# 4.15.5 Robust Imputation

You might also like