From f281674b7519de3d6b69e040beb65ea52afcf86d Mon Sep 17 00:00:00 2001 From: syneffort Date: Thu, 24 Nov 2022 18:12:52 +0900 Subject: [PATCH] ensemble --- Ensemble/ensemble.r | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 Ensemble/ensemble.r diff --git a/Ensemble/ensemble.r b/Ensemble/ensemble.r new file mode 100644 index 0000000..dc9f6a4 --- /dev/null +++ b/Ensemble/ensemble.r @@ -0,0 +1,32 @@ +install.packages("randomForest") +library(randomForest) + +head(iris) + +# iris 개수만큼의 1 or 2의 70% : 30% 확률로 랜덤한 값을 생성 +idx = sample(2, nrow(iris), replace = T, prob = c(0.7, 0.3)) + +trainData = iris[idx == 1,] +testData = iris[idx == 2,] + +# 의사결정 트리 100개 생성 +model = randomForest(Species~., data=trainData, ntree=100, proximity=T) +model + +# Call: +# randomForest(formula = Species ~ ., data = trainData, ntree = 100, proximity = T) +# Type of random forest: classification +# Number of trees: 100 +# No. of variables tried at each split: 2 + +# OOB estimate of error rate: 6.36% +# Confusion matrix: +# setosa versicolor virginica class.error +# setosa 36 0 0 0.00000000 +# versicolor 0 34 3 0.08108108 +# virginica 0 4 33 0.10810811 + +# OOB: Out Of Bag + +table(trainData$Species, predict(model)) +importance(model) # 지니계수: 값이 높은 변수가 클래스를 분류하는데 가장 큰 영향을 줌 \ No newline at end of file