You are on page 1of 6

# Code for K Nearest Neighbors

install.packages(caret)

#package import statement

library(caret)

#Data import statements

Data_df <- read.csv("Bigbasket1", header = FALSE) #load data to wine_df dataframe

str(Data_df) #structure of our data frame

#Data partitioning

set.seed(3033)

intrain <- createDataPartition(y = wine_df$V1, p= 0.7, list = FALSE)

training <- wine_df[intrain,]

testing <- wine_df[-intrain,]

#check dimensions of train & test set


dim(training); dim(testing);

#check whether any NA value exists or not

anyNA(data1)

summary(data1) #summary stats of our data

training[["V1"]] = factor(training[["V1"]]) #conversion of V1 integer variable to factor variable

#Training & Preprocessing

trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)

set.seed(3333)

knn_fit <- train(V1 ~., data = training, method = "knn",

trControl=trctrl,

preProcess = c("center", "scale"),

tuneLength = 10)

knn_fit #knn classifier


#plot accuracy vs K Value graph

plot(knn_fit)

#predict classes for test set using knn classifier

test_pred <- predict(knn_fit, newdata = testing)

test_pred

#Test set Statistics

confusionMatrix(test_pred, testing$V1 ) #check accuracy


# Code for K Nearest Neighbors

install.packages(caret)

#package import statement

library(caret)

#Data import statements

Data_df <- read_excel("C:/sem4/New folder (2)/Bigbasket (1).xls",sheet =2) #load data to


wine_df dataframe

str(Data_df) #structure of our data frame

#Data partitioning

set.seed(62135)

intrain <- createDataPartition(y = Data_df$ Description, p= 0.7, list = FALSE)

training <- Data_df[intrain,]


testing <- Data_df[-intrain,]

#check dimensions of train & test set

dim(training); dim(testing);

#check whether any NA value exists or not

anyNA(Data_df)

summary(Data_df) #summary stats of our data

training[["Description"]] = factor(training[["Description"]]) #conversion of V1 integer variable to


factor variable

#Training & Preprocessing

trctrl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)

set.seed(62135)

knn_fit <- train(Description ~., data = training, method = "knn",

trControl=trctrl,

preProcess = c("center", "scale"),


tuneLength = 10)

knn_fit #knn classifier

#plot accuracy vs K Value graph

plot(knn_fit)

#predict classes for test set using knn classifier

test_pred <- predict(knn_fit, newdata = testing)

test_pred

#Test set Statistics

confusionMatrix(test_pred, testing$Description ) #check accuracy

You might also like