install.packages("randomForest") library(randomForest) head(iris) # iris 개수만큼의 1 or 2의 70% : 30% 확률로 랜덤한 값을 생성 idx = sample(2, nrow(iris), replace = T, prob = c(0.7, 0.3)) trainData = iris[idx == 1,] testData = iris[idx == 2,] # 의사결정 트리 100개 생성 model = randomForest(Species~., data=trainData, ntree=100, proximity=T) model # Call: # randomForest(formula = Species ~ ., data = trainData, ntree = 100, proximity = T) # Type of random forest: classification # Number of trees: 100 # No. of variables tried at each split: 2 # OOB estimate of error rate: 6.36% # Confusion matrix: # setosa versicolor virginica class.error # setosa 36 0 0 0.00000000 # versicolor 0 34 3 0.08108108 # virginica 0 4 33 0.10810811 # OOB: Out Of Bag table(trainData$Species, predict(model)) importance(model) # 지니계수: 값이 높은 변수가 클래스를 분류하는데 가장 큰 영향을 줌