You are on page 1of 3

rm(list = ls())

getwd()
setwd("/Users/z001ypc/Desktop/OSA/OSA_Markov")
load("/Users/z001ypc/Desktop/OSA/OSA_Markov/Markov_Chain.RData")
require("gdata")
require("NetData")
require("igraph")
require("DataCombine")
require("gtools")
require("plyr")
require("BBmisc")
require("csv")
require("reshape2")
require("caret")

### Data Preparation ###


mydata= data.frame(read.csv("All_Complete_Data.csv",header = TRUE, sep=","))
names(mydata) <- gsub(names(mydata),pattern =
"osa_sample_50_store_hourly_all_data.",replacement = "")
mydata$sls_d= as.Date(as.character(mydata$sls_d))
list_of_stores= unique(mydata$co_loc_i)
mydata=mydata[mydata$co_loc_i %in% list_of_stores,]
mydata_new= split(mydata,list(mydata$co_loc_i,mydata$dpci_lbl_t,
mydata$day_of_week))
list_drop= c("dpc_lbl_t","co_loc_i","mdse_item_i","mdse_dept_ref_i",
"mdse_clas_ref_i", "dpci_lbl_t", "mdse_dept_n", "mdse_clas_n",
"week_of_month","weekday_of_month","day_of_month", "day_of_week",
"class_ti_00_07", "class_ti_8_00" ,"class_ti_9_00", "class_ti_10_00",
"class_ti_11_00", "class_ti_12_00","class_ti_13_00", "class_ti_14_00",
"class_ti_15_00", "class_ti_16_00", "class_ti_17_00", "class_ti_18_00",
"class_ti_19_00", "class_ti_20_00", "class_ti_21_00",
"class_ti_22_00", "class_ti_23_00", "class_ti_24_00", "traffic_ti_00_07",
"traffic_ti_8_00", "traffic_ti_9_00", "traffic_ti_10_00",
"traffic_ti_11_00", "traffic_ti_12_00", "traffic_ti_13_00", "traffic_ti_14_00",
"traffic_ti_15_00", "traffic_ti_16_00", "traffic_ti_17_00",
"traffic_ti_18_00", "traffic_ti_19_00", "traffic_ti_20_00", "traffic_ti_21_00",
"traffic_ti_22_00","traffic_ti_23_00" ,"traffic_ti_24_00" )

mydata_new=lapply(mydata_new, function(x) x[, !colnames(x) %in% list_drop])

require('reshape2')
mydata_new<-lapply(mydata_new, function(x) melt(x, id=c("sls_d")))
mydata_new<-lapply(mydata_new, function(x) data.frame(x[order(x$sls_d),]))
mydata_new<- lapply(mydata_new, function(x) replace(x, is.na(x),0))
training_sales=lapply(mydata_new, function(x) as.vector(x$value))
training_sales=lapply(training_sales, function(x) as.integer(x))
training_sales=lapply(training_sales, function(x) x[x>= 0])

list_name_training_sales=names(training_sales)[names(training_sales) !=
"character(0)"]
training_sales=training_sales[list_name_training_sales]
train_df_sizes= lapply(training_sales, function(x) length(x))
#you numbers to sample from
max_sales= lapply(training_sales, function(x) max(x))
list_with_sales= names(which(max_sales != "-Inf"))
max_sales=max_sales[list_with_sales]
list_with_sales= names(which(max_sales != "NA"))
max_sales=max_sales[list_with_sales]
mynumbers = lapply(max_sales, function(x) c(0:x))
training_sales= training_sales[names(mynumbers)]

train_df_sizes= lapply(training_sales, function(x) length(x))

###test_data
mydata_test= data.frame(read.xls("test.xlsx" ,sheet = 1, header = TRUE, as.is=
TRUE))
names(mydata_test) <- gsub(names(mydata_test),pattern =
"osa_sample_50_store_hourly_all_data.",replacement = "")
mydata_test$sls_d= as.Date(as.character(mydata_test$sls_d))
test_mydata_new=
split(mydata_test,list(mydata_test$co_loc_i,mydata_test$dpci_lbl_t,
mydata_test$day_of_week))
names(test_mydata_new)=lapply(test_mydata_new, function(x) c(unique(x$co_loc_i),
unique(x$dpci_lbl_t), unique(x$day_of_week)))
test_mydata_new=lapply(test_mydata_new, function(x) x[, !colnames(x) %in%
list_drop])
test_mydata_new<-lapply(test_mydata_new, function(x)
data.frame(x[order(x$sls_d),]))
test_mydata_new<-lapply(test_mydata_new, function(x)
melt.data.table(as.data.table(x), id=c("sls_d")))
test_mydata_new<-lapply(test_mydata_new, function(x)
data.frame(x[order(x$sls_d),]))
test_mydata_new<- lapply(test_mydata_new, function(x)
as.data.frame(cbind(as.character(x$sls_d), as.character(x$variable),
as.integer(x$value))))
colname_1<-c('sls_d','hour_info', 'value')
test_mydata_new= lapply(test_mydata_new, setNames, colname_1)
test_mydata_new<- lapply(test_mydata_new, function(x) replace(x, is.na(x),0))
test_sales=lapply(test_mydata_new, function(x) as.vector(x$value))
test_sales=lapply(test_sales, function(x) as.integer(x))
test_sales=lapply(test_sales, function(x) x[x>= 0])

list_name_test_sales=names(test_sales)[names(test_sales) != "character(0)"]
test_sales=test_sales[list_name_test_sales]

transMatrixFunc <- function(vctr) {


print(class(vctr))
mtrx1 <- matrix(nrow=max(vctr)+1,ncol = max(vctr)+1,data = rep(0, (max(vctr)+1) *
(max(vctr)+1)),dimnames = list((0:max(vctr)),(0:max(vctr))))
mtrx2 <- matrix(nrow=max(vctr)+1,ncol = max(vctr)+1,data = rep(0, (max(vctr)+1) *
(max(vctr)+1)),dimnames = list((0:max(vctr)),(0:max(vctr))))
for(indx in (2:length(vctr))) {
mtrx1[as.character(vctr[indx-1]),as.character(vctr[indx])] = 1 +
mtrx1[as.character(vctr[indx-1]),as.character(vctr[indx])]
}
for(indx_2 in (1:(max(vctr)+1))) {
if (sum(mtrx1[indx_2,]) !=0)
{
mtrx2[indx_2,]= mtrx1[indx_2,]/sum(mtrx1[indx_2,])
}
else {mtrx2[indx_2,]= 1/dim(mtrx2)[2]}
}
mtrx2
}

transition_matrices= lapply(training_sales[1], transMatrixFunc)


initial_val= mapply(FUN= function(x,y) if (y>0) x[y] else 0, training_sales,
train_df_sizes, SIMPLIFY = F)

###################################################################################
####
########################- Prediction using steady state
probabilities#################

##steady state distribution


eigen_values= lapply(transition_matrices, function(p) eigen(t(p)))
eigen_vectors = lapply(eigen_values,function(e) Re(e$vectors[ ,1]))
# Normalize.
steady_probability= lapply(eigen_vectors, function(x) as.numeric(t(x/sum(x))))
steady_probability= lapply(steady_probability, function(x) x[x>= 0])
steady_probability=mapply(function(x,y)
x[0:length(y)],steady_probability,mynumbers[1])
steady_probability= lapply(steady_probability, function(x) replace(x,is.na(x),0))
#steady_probability= mapply(FUN= function(x,y) data.frame(y<-row.names(x), x),
steady_probability, mynumbers)

steady_probability= mapply(setNames ,steady_probability,mynumbers)


mynumbers= lapply(mynumbers,function(x) as.vector(as.integer(x)))
v2=vector("list", length(mynumbers))
names(v2)<-names(mynumbers)
for (i in 1:200){v1<- as.list(mapply(FUN= function(x,y) sample(x, size = 1, replace
= T,prob =y),mynumbers,steady_probability))
v2<-mapply( append,v1,v2, SIMPLIFY = FALSE)}

test_df_size=lapply(test_sales,function(x) length(x))

a=mapply(function(x,y) x[0:y], v2, test_df_size)


a=mapply(function(x,y) x[(200-y):200], v2, test_df_size)
names(a)= names(test_df_size)
out_put_list= names(a)[!is.na(names(a))]
a= a[out_put_list]
#unique_states= lapply(a,function(x) length(unique(x)))
#list_store_item= names(unique_states)[unique_states==2]
#a=a[list_store_item]

You might also like