renaming
This commit is contained in:
@ -0,0 +1,9 @@
|
||||
{
|
||||
"sortOrder": [
|
||||
{
|
||||
"columnIndex": 2,
|
||||
"ascending": true
|
||||
}
|
||||
],
|
||||
"path": "~/git/public/lab/ds/25-1/r"
|
||||
}
|
||||
@ -0,0 +1,7 @@
|
||||
{
|
||||
"installOptions": {
|
||||
"installFromRepository": true,
|
||||
"libraryPath": "/home/sek1ro/R/x86_64-pc-linux-gnu-library/4.5",
|
||||
"installDependencies": true
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,3 @@
|
||||
{
|
||||
"activeTab": 0
|
||||
}
|
||||
@ -0,0 +1,14 @@
|
||||
{
|
||||
"left": {
|
||||
"splitterpos": 453,
|
||||
"topwindowstate": "NORMAL",
|
||||
"panelheight": 1097,
|
||||
"windowheight": 1135
|
||||
},
|
||||
"right": {
|
||||
"splitterpos": 680,
|
||||
"topwindowstate": "NORMAL",
|
||||
"panelheight": 1097,
|
||||
"windowheight": 1135
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,5 @@
|
||||
{
|
||||
"TabSet1": 0,
|
||||
"TabSet2": 0,
|
||||
"TabZoom": {}
|
||||
}
|
||||
5
5/data science/r/.Rproj.user/C6239C96/rmd-outputs
Normal file
5
5/data science/r/.Rproj.user/C6239C96/rmd-outputs
Normal file
@ -0,0 +1,5 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1 @@
|
||||
{"active_set":"","sets":[]}
|
||||
26
5/data science/r/.Rproj.user/C6239C96/sources/per/t/36F8AE4B
Normal file
26
5/data science/r/.Rproj.user/C6239C96/sources/per/t/36F8AE4B
Normal file
@ -0,0 +1,26 @@
|
||||
{
|
||||
"id": "36F8AE4B",
|
||||
"path": "~/git/public/lab/ds/25-1/r/9.Rmd",
|
||||
"project_path": "9.Rmd",
|
||||
"type": "r_markdown",
|
||||
"hash": "1911220946",
|
||||
"contents": "",
|
||||
"dirty": false,
|
||||
"created": 1769447921680.0,
|
||||
"source_on_save": false,
|
||||
"relative_order": 1,
|
||||
"properties": {
|
||||
"source_window_id": "",
|
||||
"Source": "Source",
|
||||
"cursorPosition": "153,3",
|
||||
"scrollLine": "154"
|
||||
},
|
||||
"folds": "",
|
||||
"lastKnownWriteTime": 1769450242,
|
||||
"encoding": "UTF-8",
|
||||
"collab_server": "",
|
||||
"source_window": "",
|
||||
"last_content_update": 1769450242314,
|
||||
"read_only": false,
|
||||
"read_only_alternatives": []
|
||||
}
|
||||
@ -0,0 +1,161 @@
|
||||
---
|
||||
title: "Lab9: Decision trees"
|
||||
author: "Vladislav Litvinov <vlad@sek1ro>"
|
||||
output:
|
||||
pdf_document:
|
||||
toc_float: TRUE
|
||||
---
|
||||
# Data preparation
|
||||
```{r}
|
||||
setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
|
||||
survey <- read.csv('survey.csv')
|
||||
|
||||
train_df = survey[1:600,]
|
||||
test_df = survey[601:750,]
|
||||
```
|
||||
# Building classification tree
|
||||
decision formula is MYDEPV ~ Price + Income + Age
|
||||
|
||||
Use three-fold cross-validation and the information gain splitting index
|
||||
Which features were actually used to construct the tree?
|
||||
Plot the tree using the “rpart.plot” package.
|
||||
|
||||
Three-fold cross-validation - Делают 3 прогона:
|
||||
Прогон 1: обучаемся на B + C, тестируем на A
|
||||
Прогон 2: обучаемся на A + C, тестируем на B
|
||||
Прогон 3: обучаемся на A + B, тестируем на C
|
||||
|
||||
Получаем 3 значения метрики (accuracy, F1, MSE и т.п.).
|
||||
Берём среднее значение — это и есть итоговая оценка качества модели.
|
||||
|
||||
rpart сам отбрасывает признаки, если они не улучшают разбиение по information gain.
|
||||
|
||||
CP-table - связь сложности дерева и ошибки
|
||||
Root node error — ошибка без разбиений
|
||||
nsplit — число split-ов
|
||||
rel error — обучающая ошибка относительно корня
|
||||
xerror — ошибка по cross-validation
|
||||
xstd — стандартное отклонение xerror
|
||||
|
||||
type — расположение split-ов
|
||||
extra — доп. информация в узлах
|
||||
fallen.leaves — выравнивание листьев
|
||||
|
||||
H = -x\cdot\log\left(x\right)-\left(1-x\right)\log\left(1-x\right)
|
||||
Gain(A) = Info(S) - Info(S_A) - максимизируем
|
||||
|
||||
Ранняя остановка. Ограничение грубины. Минимальное количество примеров в узле.
|
||||
|
||||
Отсечение ветвей.
|
||||
Строительство полного дерева, в котором листья содержат примеры одного класса.
|
||||
Определение двух показателей: относительную точность модели и абсолютную ошибку.
|
||||
Удаление листов и узлов, потеря которых минимально скажется на точности модели и увеличении ошибки.
|
||||
|
||||
|
||||
```{r}
|
||||
library(rpart)
|
||||
tree = rpart(
|
||||
MYDEPV ~ Price + Income + Age,
|
||||
data = train_df,
|
||||
method = "class",
|
||||
parms = list(split = "information"),
|
||||
control = rpart.control(
|
||||
xval = 3,
|
||||
),
|
||||
)
|
||||
printcp(tree)
|
||||
|
||||
library(rpart.plot)
|
||||
|
||||
rpart.plot(
|
||||
tree,
|
||||
type = 1,
|
||||
extra = 106,
|
||||
#6 Class models: the probability of the second class only. Useful for binary responses.
|
||||
#100 display the percentage of observations in the node.
|
||||
fallen.leaves = TRUE,
|
||||
)
|
||||
```
|
||||
Score the model with the training data and create the model’s confusion matrix. Which class of MYDEPV was the model better able to classify?
|
||||
```{r}
|
||||
pred_class = predict(tree, train_df, type="class")
|
||||
|
||||
conf_mat = table(
|
||||
Actual = train_df$MYDEPV,
|
||||
Predicted = pred_class
|
||||
)
|
||||
|
||||
conf_mat
|
||||
print(diag(conf_mat) / rowSums(conf_mat))
|
||||
```
|
||||
Define the resubstitution error rate, and then calculate it using the confusion matrix from the previous step. Is it a good indicator of predictive performance? Why or why not?
|
||||
|
||||
Resubstitution error rate — это доля неправильных предсказаний на тех же данных, на которых обучалась модель
|
||||
```{r}
|
||||
print(1 - sum(diag(conf_mat)) / sum(conf_mat))
|
||||
```
|
||||
ROC curve - Receiver Operating Characteristic
|
||||
x - FPR = FP / (FP + TN)
|
||||
y - TPR = TP / (TP + FN)
|
||||
```{r}
|
||||
pred_prob = predict(tree, train_df, type="prob")[,2]
|
||||
|
||||
library(ROCR)
|
||||
pred = prediction(pred_prob, train_df$MYDEPV)
|
||||
perf = performance(pred, "tpr", "fpr")
|
||||
|
||||
plot(perf)
|
||||
abline(a = 0, b = 1)
|
||||
|
||||
auc_perf = performance(pred, measure = "auc")
|
||||
auc_perf@y.values[[1]]
|
||||
```
|
||||
Score the model with the testing data. How accurate are the tree’s predictions?
|
||||
Repeat part (a), but set the splitting index to the Gini coefficient splitting index. How does the new tree compare to the previous one?
|
||||
|
||||
индекс Джини показывает, как часто случайно выбранный пример обучающего множества будет распознан неправильно.
|
||||
|
||||
Gini(Q) = 1 - sum(p^2) - максимизируем
|
||||
0 - все к 1 классу
|
||||
1 - все равновероятны
|
||||
1-\ x^{2}\ -\ \left(1-x\right)^{2}
|
||||
```{r}
|
||||
pred_test = predict(tree, test_df, type="class")
|
||||
conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)
|
||||
conf_mat_test
|
||||
print(diag(conf_mat_test) / rowSums(conf_mat_test))
|
||||
|
||||
tree_gini = rpart(
|
||||
MYDEPV ~ Price + Income + Age,
|
||||
data = train_df,
|
||||
method = "class",
|
||||
parms = list(split = "gini")
|
||||
)
|
||||
|
||||
printcp(tree_gini)
|
||||
|
||||
rpart.plot(
|
||||
tree_gini,
|
||||
type = 1,
|
||||
extra = 106,
|
||||
fallen.leaves = TRUE,
|
||||
)
|
||||
```
|
||||
One way to prune a tree is according to the complexity parameter associated with the smallest cross-validation error. Prune the new tree in this way using the “prune” function. Which features were actually used in the pruned tree? Why were certain variables not used?
|
||||
```{r}
|
||||
best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
|
||||
best_cp
|
||||
|
||||
pruned_tree = prune(tree_gini, cp = best_cp)
|
||||
|
||||
printcp(pruned_tree)
|
||||
|
||||
rpart.plot(pruned_tree)
|
||||
```
|
||||
Create the confusion matrix for the new model, and compare the performance of the model before and after pruning.
|
||||
```{r}
|
||||
pruned_pred = predict(pruned_tree, test_df, type="class")
|
||||
pruned_conf_mat = table(Actual = test_df$MYDEPV, Predicted = pruned_pred)
|
||||
pruned_conf_mat
|
||||
print(diag(pruned_conf_mat) / rowSums(pruned_conf_mat))
|
||||
```
|
||||
@ -0,0 +1,6 @@
|
||||
{
|
||||
"source_window_id": "",
|
||||
"Source": "Source",
|
||||
"cursorPosition": "153,3",
|
||||
"scrollLine": "154"
|
||||
}
|
||||
@ -0,0 +1,7 @@
|
||||
{
|
||||
"tempName": "Untitled1",
|
||||
"source_window_id": "",
|
||||
"Source": "Source",
|
||||
"cursorPosition": "28,0",
|
||||
"scrollLine": "17"
|
||||
}
|
||||
2
5/data science/r/.Rproj.user/C6239C96/sources/prop/INDEX
Normal file
2
5/data science/r/.Rproj.user/C6239C96/sources/prop/INDEX
Normal file
@ -0,0 +1,2 @@
|
||||
~%2Fgit%2Fpublic%2Flab%2Fds%2F25-1%2Fr%2F9.Rmd="231EDFBF"
|
||||
~%2Fgit%2Fpublic%2Flab%2Fds%2F25-1%2Fr2%2F3.R="D80D5B6A"
|
||||
@ -0,0 +1 @@
|
||||
{"chunk_definitions":[{"row":14,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-3","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cw3y8fjmo2ayt","chunk_label":"unnamed-chunk-1"},{"row":77,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-4","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cgb1v2g83kknt","chunk_label":"unnamed-chunk-2"},{"row":89,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-5","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c3jleyvkqxnqm","chunk_label":"unnamed-chunk-3"},{"row":95,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-6","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c60fx7tj15bk5","chunk_label":"unnamed-chunk-4"},{"row":111,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-7","dev":"png"},"document_id":"36F8AE4B","chunk_id":"csdwusaa8puvd","chunk_label":"unnamed-chunk-5"},{"row":142,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-18","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cr3h7jd3nr0ya","chunk_label":"unnamed-chunk-6"},{"row":153,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-19","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cpyo5ihaht7o1","chunk_label":"unnamed-chunk-7"},{"row":160,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-20","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cce5y7xzr9zk6","chunk_label":"unnamed-chunk-8"}],"doc_write_time":1769443515}
|
||||
@ -0,0 +1,32 @@
|
||||
"0","pred_class = predict(tree, train_df, type=""class"")"
|
||||
"0",""
|
||||
"0","conf_mat = table("
|
||||
"0"," Actual = train_df$MYDEPV,"
|
||||
"0"," Predicted = pred_class"
|
||||
"0",")"
|
||||
"0",""
|
||||
"0","conf_mat"
|
||||
"1"," Predicted
|
||||
"
|
||||
"1","Actual"
|
||||
"1"," 0"
|
||||
"1"," 1"
|
||||
"1","
|
||||
0"
|
||||
"1"," 314"
|
||||
"1"," 26"
|
||||
"1","
|
||||
1"
|
||||
"1"," 19"
|
||||
"1"," 241"
|
||||
"1","
|
||||
"
|
||||
"0","print(diag(conf_mat) / rowSums(conf_mat))"
|
||||
"1"," 0 "
|
||||
"1"," 1 "
|
||||
"1","
|
||||
"
|
||||
"1","0.9235294 "
|
||||
"1","0.9269231 "
|
||||
"1","
|
||||
"
|
||||
|
@ -0,0 +1,5 @@
|
||||
"0","print(1 - sum(diag(conf_mat)) / sum(conf_mat))"
|
||||
"1","[1]"
|
||||
"1"," 0.075"
|
||||
"1","
|
||||
"
|
||||
|
@ -0,0 +1,27 @@
|
||||
"0","pruned_pred = predict(pruned_tree, test_df, type=""class"")"
|
||||
"0","pruned_conf_mat = table(Actual = test_df$MYDEPV, Predicted = pruned_pred)"
|
||||
"0","pruned_conf_mat"
|
||||
"1"," Predicted
|
||||
"
|
||||
"1","Actual"
|
||||
"1"," 0"
|
||||
"1"," 1"
|
||||
"1","
|
||||
0"
|
||||
"1"," 82"
|
||||
"1"," 4"
|
||||
"1","
|
||||
1"
|
||||
"1"," 13"
|
||||
"1"," 51"
|
||||
"1","
|
||||
"
|
||||
"0","print(diag(pruned_conf_mat) / rowSums(pruned_conf_mat))"
|
||||
"1"," 0 "
|
||||
"1"," 1 "
|
||||
"1","
|
||||
"
|
||||
"1","0.9534884 "
|
||||
"1","0.7968750 "
|
||||
"1","
|
||||
"
|
||||
|
@ -0,0 +1,107 @@
|
||||
"0","library(rpart)"
|
||||
"0","tree = rpart("
|
||||
"0"," MYDEPV ~ Price + Income + Age,"
|
||||
"0"," data = train_df,"
|
||||
"0"," method = ""class"","
|
||||
"0"," parms = list(split = ""information""),"
|
||||
"0"," control = rpart.control("
|
||||
"0"," xval = 3,"
|
||||
"0"," ),"
|
||||
"0",")"
|
||||
"0","printcp(tree)"
|
||||
"1","
|
||||
Classification tree:
|
||||
"
|
||||
"1","rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df,
|
||||
"
|
||||
"1"," method = ""class"", parms = list(split = ""information""), control = rpart.control(xval = 3,
|
||||
"
|
||||
"1"," ))
|
||||
"
|
||||
"1","
|
||||
"
|
||||
"1","Variables actually used in tree construction:
|
||||
"
|
||||
"1","[1]"
|
||||
"1"," Age "
|
||||
"1"," Income"
|
||||
"1"," Price "
|
||||
"1","
|
||||
"
|
||||
"1","
|
||||
"
|
||||
"1","Root node error: "
|
||||
"1",""
|
||||
"1","260"
|
||||
"1",""
|
||||
"1","/"
|
||||
"1",""
|
||||
"1","600"
|
||||
"1",""
|
||||
"1"," = "
|
||||
"1",""
|
||||
"1","0.43333"
|
||||
"1",""
|
||||
"1","
|
||||
|
||||
"
|
||||
"1","n="
|
||||
"1"," "
|
||||
"1","600"
|
||||
"1"," "
|
||||
"1","
|
||||
|
||||
"
|
||||
"1"," "
|
||||
"1"," CP"
|
||||
"1"," nsplit"
|
||||
"1"," rel error"
|
||||
"1"," xerror"
|
||||
"1"," xstd"
|
||||
"1","
|
||||
1"
|
||||
"1"," 0.692308"
|
||||
"1"," 0"
|
||||
"1"," 1.00000"
|
||||
"1"," 1.00000"
|
||||
"1"," 0.046685"
|
||||
"1","
|
||||
2"
|
||||
"1"," 0.025000"
|
||||
"1"," 1"
|
||||
"1"," 0.30769"
|
||||
"1"," 0.31154"
|
||||
"1"," 0.032194"
|
||||
"1","
|
||||
3"
|
||||
"1"," 0.011538"
|
||||
"1"," 3"
|
||||
"1"," 0.25769"
|
||||
"1"," 0.27308"
|
||||
"1"," 0.030430"
|
||||
"1","
|
||||
4"
|
||||
"1"," 0.010256"
|
||||
"1"," 5"
|
||||
"1"," 0.23462"
|
||||
"1"," 0.26923"
|
||||
"1"," 0.030244"
|
||||
"1","
|
||||
5"
|
||||
"1"," 0.010000"
|
||||
"1"," 11"
|
||||
"1"," 0.17308"
|
||||
"1"," 0.26923"
|
||||
"1"," 0.030244"
|
||||
"1","
|
||||
"
|
||||
"0","library(rpart.plot)"
|
||||
"0",""
|
||||
"0","rpart.plot("
|
||||
"0"," tree,"
|
||||
"0"," type = 1,"
|
||||
"0"," extra = 106,"
|
||||
"0"," #6 Class models: the probability of the second class only. Useful for binary responses."
|
||||
"0"," #100 display the percentage of observations in the node. "
|
||||
"0"," fallen.leaves = TRUE,"
|
||||
"0",")"
|
||||
|
@ -0,0 +1 @@
|
||||
{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
Binary file not shown.
@ -0,0 +1 @@
|
||||
{"chunk_definitions":[{"row":14,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-3","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cw3y8fjmo2ayt","chunk_label":"unnamed-chunk-1"},{"row":77,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-4","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cgb1v2g83kknt","chunk_label":"unnamed-chunk-2"},{"row":89,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-5","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c3jleyvkqxnqm","chunk_label":"unnamed-chunk-3"},{"row":95,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-6","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c60fx7tj15bk5","chunk_label":"unnamed-chunk-4"},{"row":111,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-7","dev":"png"},"document_id":"36F8AE4B","chunk_id":"csdwusaa8puvd","chunk_label":"unnamed-chunk-5"},{"row":142,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-18","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cr3h7jd3nr0ya","chunk_label":"unnamed-chunk-6"},{"row":153,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-19","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cpyo5ihaht7o1","chunk_label":"unnamed-chunk-7"},{"row":160,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-20","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cce5y7xzr9zk6","chunk_label":"unnamed-chunk-8"}],"doc_write_time":1769443515}
|
||||
@ -0,0 +1,79 @@
|
||||
"0","best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, ""xerror""]), ""CP""]"
|
||||
"0","best_cp"
|
||||
"1","[1]"
|
||||
"1"," 0.01153846"
|
||||
"1","
|
||||
"
|
||||
"0","pruned_tree = prune(tree_gini, cp = best_cp)"
|
||||
"0",""
|
||||
"0","printcp(pruned_tree)"
|
||||
"1","
|
||||
Classification tree:
|
||||
"
|
||||
"1","rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df,
|
||||
"
|
||||
"1"," method = ""class"", parms = list(split = ""gini""))
|
||||
"
|
||||
"1","
|
||||
"
|
||||
"1","Variables actually used in tree construction:
|
||||
"
|
||||
"1","[1]"
|
||||
"1"," Income"
|
||||
"1"," Price "
|
||||
"1","
|
||||
"
|
||||
"1","
|
||||
"
|
||||
"1","Root node error: "
|
||||
"1",""
|
||||
"1","260"
|
||||
"1",""
|
||||
"1","/"
|
||||
"1",""
|
||||
"1","600"
|
||||
"1",""
|
||||
"1"," = "
|
||||
"1",""
|
||||
"1","0.43333"
|
||||
"1",""
|
||||
"1","
|
||||
|
||||
"
|
||||
"1","n="
|
||||
"1"," "
|
||||
"1","600"
|
||||
"1"," "
|
||||
"1","
|
||||
|
||||
"
|
||||
"1"," "
|
||||
"1"," CP"
|
||||
"1"," nsplit"
|
||||
"1"," rel error"
|
||||
"1"," xerror"
|
||||
"1"," xstd"
|
||||
"1","
|
||||
1"
|
||||
"1"," 0.692308"
|
||||
"1"," 0"
|
||||
"1"," 1.00000"
|
||||
"1"," 1.00000"
|
||||
"1"," 0.046685"
|
||||
"1","
|
||||
2"
|
||||
"1"," 0.025000"
|
||||
"1"," 1"
|
||||
"1"," 0.30769"
|
||||
"1"," 0.31154"
|
||||
"1"," 0.032194"
|
||||
"1","
|
||||
3"
|
||||
"1"," 0.011538"
|
||||
"1"," 3"
|
||||
"1"," 0.25769"
|
||||
"1"," 0.26538"
|
||||
"1"," 0.030055"
|
||||
"1","
|
||||
"
|
||||
"0","rpart.plot(pruned_tree)"
|
||||
|
@ -0,0 +1 @@
|
||||
{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 56 KiB |
Binary file not shown.
@ -0,0 +1,125 @@
|
||||
"0","pred_test = predict(tree, test_df, type=""class"")"
|
||||
"0","conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)"
|
||||
"0","conf_mat_test"
|
||||
"1"," Predicted
|
||||
"
|
||||
"1","Actual"
|
||||
"1"," 0"
|
||||
"1"," 1"
|
||||
"1","
|
||||
0"
|
||||
"1"," 76"
|
||||
"1"," 10"
|
||||
"1","
|
||||
1"
|
||||
"1"," 6"
|
||||
"1"," 58"
|
||||
"1","
|
||||
"
|
||||
"0","print(diag(conf_mat_test) / rowSums(conf_mat_test))"
|
||||
"1"," 0 "
|
||||
"1"," 1 "
|
||||
"1","
|
||||
"
|
||||
"1","0.8837209 "
|
||||
"1","0.9062500 "
|
||||
"1","
|
||||
"
|
||||
"0","tree_gini = rpart("
|
||||
"0"," MYDEPV ~ Price + Income + Age,"
|
||||
"0"," data = train_df,"
|
||||
"0"," method = ""class"","
|
||||
"0"," parms = list(split = ""gini"")"
|
||||
"0",")"
|
||||
"0",""
|
||||
"0","printcp(tree_gini)"
|
||||
"1","
|
||||
Classification tree:
|
||||
"
|
||||
"1","rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df,
|
||||
"
|
||||
"1"," method = ""class"", parms = list(split = ""gini""))
|
||||
"
|
||||
"1","
|
||||
"
|
||||
"1","Variables actually used in tree construction:
|
||||
"
|
||||
"1","[1]"
|
||||
"1"," Age "
|
||||
"1"," Income"
|
||||
"1"," Price "
|
||||
"1","
|
||||
"
|
||||
"1","
|
||||
"
|
||||
"1","Root node error: "
|
||||
"1",""
|
||||
"1","260"
|
||||
"1",""
|
||||
"1","/"
|
||||
"1",""
|
||||
"1","600"
|
||||
"1",""
|
||||
"1"," = "
|
||||
"1",""
|
||||
"1","0.43333"
|
||||
"1",""
|
||||
"1","
|
||||
|
||||
"
|
||||
"1","n="
|
||||
"1"," "
|
||||
"1","600"
|
||||
"1"," "
|
||||
"1","
|
||||
|
||||
"
|
||||
"1"," "
|
||||
"1"," CP"
|
||||
"1"," nsplit"
|
||||
"1"," rel error"
|
||||
"1"," xerror"
|
||||
"1"," xstd"
|
||||
"1","
|
||||
1"
|
||||
"1"," 0.692308"
|
||||
"1"," 0"
|
||||
"1"," 1.00000"
|
||||
"1"," 1.00000"
|
||||
"1"," 0.046685"
|
||||
"1","
|
||||
2"
|
||||
"1"," 0.025000"
|
||||
"1"," 1"
|
||||
"1"," 0.30769"
|
||||
"1"," 0.31154"
|
||||
"1"," 0.032194"
|
||||
"1","
|
||||
3"
|
||||
"1"," 0.011538"
|
||||
"1"," 3"
|
||||
"1"," 0.25769"
|
||||
"1"," 0.26538"
|
||||
"1"," 0.030055"
|
||||
"1","
|
||||
4"
|
||||
"1"," 0.010256"
|
||||
"1"," 5"
|
||||
"1"," 0.23462"
|
||||
"1"," 0.28846"
|
||||
"1"," 0.031157"
|
||||
"1","
|
||||
5"
|
||||
"1"," 0.010000"
|
||||
"1"," 11"
|
||||
"1"," 0.17308"
|
||||
"1"," 0.28462"
|
||||
"1"," 0.030978"
|
||||
"1","
|
||||
"
|
||||
"0","rpart.plot("
|
||||
"0"," tree_gini,"
|
||||
"0"," type = 1,"
|
||||
"0"," extra = 106,"
|
||||
"0"," fallen.leaves = TRUE,"
|
||||
"0",")"
|
||||
|
@ -0,0 +1 @@
|
||||
{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
Binary file not shown.
@ -0,0 +1,7 @@
|
||||
"0","pred_prob = predict(tree, train_df, type=""prob"")[,2]"
|
||||
"0",""
|
||||
"0","library(ROCR)"
|
||||
"0","pred = prediction(pred_prob, train_df$MYDEPV)"
|
||||
"0","perf = performance(pred, ""tpr"", ""fpr"")"
|
||||
"0",""
|
||||
"0","plot(perf)"
|
||||
|
@ -0,0 +1 @@
|
||||
"0","abline(a = 0, b = 1)"
|
||||
|
@ -0,0 +1 @@
|
||||
{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 20 KiB |
Binary file not shown.
@ -0,0 +1,7 @@
|
||||
"0",""
|
||||
"0","auc_perf = performance(pred, measure = ""auc"")"
|
||||
"0","auc_perf@y.values[[1]]"
|
||||
"1","[1]"
|
||||
"1"," 0.9720645"
|
||||
"1","
|
||||
"
|
||||
|
@ -0,0 +1,5 @@
|
||||
"0","setwd('/home/sek1ro/git/public/lab/ds/25-1/r')"
|
||||
"0","survey <- read.csv('survey.csv')"
|
||||
"0",""
|
||||
"0","train_df = survey[1:600,]"
|
||||
"0","test_df = survey[601:750,]"
|
||||
|
1
5/data science/r/.Rproj.user/shared/notebooks/paths
Normal file
1
5/data science/r/.Rproj.user/shared/notebooks/paths
Normal file
@ -0,0 +1 @@
|
||||
/home/sek1ro/git/public/lab/ds/25-1/r/9.Rmd="EB7B11F9"
|
||||
Reference in New Issue
Block a user