renaming

2026-02-17 23:13:20 +03:00
parent 65218abfb1
commit e52dde575a
429 changed files with 875 additions and 14 deletions
--- a/science/r/.Rproj.user/C6239C96/pcs/files-pane.pper
+++ b/science/r/.Rproj.user/C6239C96/pcs/files-pane.pper
@ -0,0 +1,9 @@
+{
+    "sortOrder": [
+        {
+            "columnIndex": 2,
+            "ascending": true
+        }
+    ],
+    "path": "~/git/public/lab/ds/25-1/r"
+}
--- a/science/r/.Rproj.user/C6239C96/pcs/packages-pane.pper
+++ b/science/r/.Rproj.user/C6239C96/pcs/packages-pane.pper
@ -0,0 +1,7 @@
+{
+    "installOptions": {
+        "installFromRepository": true,
+        "libraryPath": "/home/sek1ro/R/x86_64-pc-linux-gnu-library/4.5",
+        "installDependencies": true
+    }
+}
--- a/science/r/.Rproj.user/C6239C96/pcs/source-pane.pper
+++ b/science/r/.Rproj.user/C6239C96/pcs/source-pane.pper
@ -0,0 +1,3 @@
+{
+    "activeTab": 0
+}
--- a/science/r/.Rproj.user/C6239C96/pcs/windowlayoutstate.pper
+++ b/science/r/.Rproj.user/C6239C96/pcs/windowlayoutstate.pper
@ -0,0 +1,14 @@
+{
+    "left": {
+        "splitterpos": 453,
+        "topwindowstate": "NORMAL",
+        "panelheight": 1097,
+        "windowheight": 1135
+    },
+    "right": {
+        "splitterpos": 680,
+        "topwindowstate": "NORMAL",
+        "panelheight": 1097,
+        "windowheight": 1135
+    }
+}
--- a/science/r/.Rproj.user/C6239C96/pcs/workbench-pane.pper
+++ b/science/r/.Rproj.user/C6239C96/pcs/workbench-pane.pper
@ -0,0 +1,5 @@
+{
+    "TabSet1": 0,
+    "TabSet2": 0,
+    "TabZoom": {}
+}
--- a/science/r/.Rproj.user/C6239C96/rmd-outputs
+++ b/science/r/.Rproj.user/C6239C96/rmd-outputs
@ -0,0 +1,5 @@
+
+
+
+
+
--- a/science/r/.Rproj.user/C6239C96/saved_source_markers
+++ b/science/r/.Rproj.user/C6239C96/saved_source_markers
@ -0,0 +1 @@
+{"active_set":"","sets":[]}
--- a/science/r/.Rproj.user/C6239C96/sources/per/t/36F8AE4B
+++ b/science/r/.Rproj.user/C6239C96/sources/per/t/36F8AE4B
@ -0,0 +1,26 @@
+{
+    "id": "36F8AE4B",
+    "path": "~/git/public/lab/ds/25-1/r/9.Rmd",
+    "project_path": "9.Rmd",
+    "type": "r_markdown",
+    "hash": "1911220946",
+    "contents": "",
+    "dirty": false,
+    "created": 1769447921680.0,
+    "source_on_save": false,
+    "relative_order": 1,
+    "properties": {
+        "source_window_id": "",
+        "Source": "Source",
+        "cursorPosition": "153,3",
+        "scrollLine": "154"
+    },
+    "folds": "",
+    "lastKnownWriteTime": 1769450242,
+    "encoding": "UTF-8",
+    "collab_server": "",
+    "source_window": "",
+    "last_content_update": 1769450242314,
+    "read_only": false,
+    "read_only_alternatives": []
+}
--- a/science/r/.Rproj.user/C6239C96/sources/per/t/36F8AE4B-contents
+++ b/science/r/.Rproj.user/C6239C96/sources/per/t/36F8AE4B-contents
@ -0,0 +1,161 @@
+---
+title: "Lab9: Decision trees"
+author: "Vladislav Litvinov <vlad@sek1ro>"
+output:
+  pdf_document:
+  toc_float: TRUE
+---
+# Data preparation
+```{r}
+setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
+survey <- read.csv('survey.csv')
+
+train_df = survey[1:600,]
+test_df = survey[601:750,]
+```
+# Building classification tree
+decision formula is MYDEPV ~ Price + Income + Age
+
+Use three-fold cross-validation and the information gain splitting index
+Which features were actually used to construct the tree?
+Plot the tree using the “rpart.plot” package.
+
+Three-fold cross-validation - Делают 3 прогона:
+Прогон 1: обучаемся на B + C, тестируем на A
+Прогон 2: обучаемся на A + C, тестируем на B
+Прогон 3: обучаемся на A + B, тестируем на C
+
+Получаем 3 значения метрики (accuracy, F1, MSE и т.п.).
+Берём среднее значение — это и есть итоговая оценка качества модели.
+
+rpart сам отбрасывает признаки, если они не улучшают разбиение по information gain.
+
+CP-table - связь сложности дерева и ошибки
+Root node error — ошибка без разбиений
+nsplit — число split-ов
+rel error — обучающая ошибка относительно корня
+xerror — ошибка по cross-validation
+xstd — стандартное отклонение xerror
+
+type — расположение split-ов
+extra — доп. информация в узлах
+fallen.leaves — выравнивание листьев
+
+H = -x\cdot\log\left(x\right)-\left(1-x\right)\log\left(1-x\right)
+Gain(A) = Info(S) - Info(S_A) - максимизируем
+
+Ранняя остановка. Ограничение грубины. Минимальное количество примеров в узле.
+
+Отсечение ветвей.
+Строительство полного дерева, в котором листья содержат примеры одного класса.
+Определение двух показателей: относительную точность модели и абсолютную ошибку.
+Удаление листов и узлов, потеря которых минимально скажется на точности модели и увеличении ошибки.
+
+
+```{r}
+library(rpart)
+tree = rpart(
+  MYDEPV ~ Price + Income + Age,
+  data = train_df,
+  method = "class",
+  parms = list(split = "information"),
+  control = rpart.control(
+    xval = 3,
+  ),
+)
+printcp(tree)
+
+library(rpart.plot)
+
+rpart.plot(
+  tree,
+  type = 1,
+  extra = 106,
+  #6 Class models: the probability of the second class only. Useful for binary responses.
+  #100 display the percentage of observations in the node. 
+  fallen.leaves = TRUE,
+)
+```
+Score the model with the training data and create the model’s confusion matrix.  Which class of MYDEPV was the model better able to classify?
+```{r}
+pred_class = predict(tree, train_df, type="class")
+
+conf_mat = table(
+  Actual = train_df$MYDEPV,
+  Predicted = pred_class
+)
+
+conf_mat
+print(diag(conf_mat) / rowSums(conf_mat))
+```
+Define the resubstitution error rate, and then calculate it using the confusion matrix from the previous step.  Is it a good indicator of predictive performance?  Why or why not?
+
+Resubstitution error rate — это доля неправильных предсказаний на тех же данных, на которых обучалась модель
+```{r}
+print(1 - sum(diag(conf_mat)) / sum(conf_mat))
+```
+ROC curve - Receiver Operating Characteristic
+x - FPR = FP / (FP + TN)
+y - TPR = TP / (TP + FN)
+```{r}
+pred_prob = predict(tree, train_df, type="prob")[,2]
+
+library(ROCR)
+pred = prediction(pred_prob, train_df$MYDEPV)
+perf = performance(pred, "tpr", "fpr")
+
+plot(perf)
+abline(a = 0, b = 1)
+
+auc_perf = performance(pred, measure = "auc")
+auc_perf@y.values[[1]]
+```
+Score the model with the testing data.  How accurate are the tree’s predictions?
+Repeat part (a), but set the splitting index to the Gini coefficient splitting index.  How does the new tree compare to the previous one? 
+
+индекс Джини показывает, как часто случайно выбранный пример обучающего множества будет распознан неправильно.
+
+Gini(Q) = 1 - sum(p^2) - максимизируем
+0 - все к 1 классу
+1 - все равновероятны
+1-\ x^{2}\ -\ \left(1-x\right)^{2}
+```{r}
+pred_test = predict(tree, test_df, type="class")
+conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)
+conf_mat_test
+print(diag(conf_mat_test) / rowSums(conf_mat_test))
+
+tree_gini = rpart(
+  MYDEPV ~ Price + Income + Age,
+  data = train_df,
+  method = "class",
+  parms = list(split = "gini")
+)
+
+printcp(tree_gini)
+
+rpart.plot(
+  tree_gini,
+  type = 1,
+  extra = 106,
+  fallen.leaves = TRUE,
+)
+```
+One way to prune a tree is according to the complexity parameter associated with the smallest cross-validation error.  Prune the new tree in this way using the “prune” function.  Which features were actually used in the pruned tree?  Why were certain variables not used?
+```{r}
+best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
+best_cp
+
+pruned_tree = prune(tree_gini, cp = best_cp)
+
+printcp(pruned_tree)
+
+rpart.plot(pruned_tree)
+```
+Create the confusion matrix for the new model, and compare the performance of the model before and after pruning.
+```{r}
+pruned_pred = predict(pruned_tree, test_df, type="class")
+pruned_conf_mat = table(Actual = test_df$MYDEPV, Predicted = pruned_pred)
+pruned_conf_mat
+print(diag(pruned_conf_mat) / rowSums(pruned_conf_mat))
+```
--- a/science/r/.Rproj.user/C6239C96/sources/prop/231EDFBF
+++ b/science/r/.Rproj.user/C6239C96/sources/prop/231EDFBF
@ -0,0 +1,6 @@
+{
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "153,3",
+    "scrollLine": "154"
+}
--- a/science/r/.Rproj.user/C6239C96/sources/prop/D80D5B6A
+++ b/science/r/.Rproj.user/C6239C96/sources/prop/D80D5B6A
@ -0,0 +1,7 @@
+{
+    "tempName": "Untitled1",
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "28,0",
+    "scrollLine": "17"
+}
--- a/science/r/.Rproj.user/C6239C96/sources/prop/INDEX
+++ b/science/r/.Rproj.user/C6239C96/sources/prop/INDEX
@ -0,0 +1,2 @@
+~%2Fgit%2Fpublic%2Flab%2Fds%2F25-1%2Fr%2F9.Rmd="231EDFBF"
+~%2Fgit%2Fpublic%2Flab%2Fds%2F25-1%2Fr2%2F3.R="D80D5B6A"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/C6239C961f2591b7/chunks.json
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/C6239C961f2591b7/chunks.json
@ -0,0 +1 @@
+{"chunk_definitions":[{"row":14,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-3","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cw3y8fjmo2ayt","chunk_label":"unnamed-chunk-1"},{"row":77,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-4","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cgb1v2g83kknt","chunk_label":"unnamed-chunk-2"},{"row":89,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-5","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c3jleyvkqxnqm","chunk_label":"unnamed-chunk-3"},{"row":95,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-6","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c60fx7tj15bk5","chunk_label":"unnamed-chunk-4"},{"row":111,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-7","dev":"png"},"document_id":"36F8AE4B","chunk_id":"csdwusaa8puvd","chunk_label":"unnamed-chunk-5"},{"row":142,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-18","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cr3h7jd3nr0ya","chunk_label":"unnamed-chunk-6"},{"row":153,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-19","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cpyo5ihaht7o1","chunk_label":"unnamed-chunk-7"},{"row":160,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-20","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cce5y7xzr9zk6","chunk_label":"unnamed-chunk-8"}],"doc_write_time":1769443515}
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/c3jleyvkqxnqm/00000f.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/c3jleyvkqxnqm/00000f.csv
@ -0,0 +1,32 @@
+"0","pred_class = predict(tree, train_df, type=""class"")"
+"0",""
+"0","conf_mat = table("
+"0","  Actual = train_df$MYDEPV,"
+"0","  Predicted = pred_class"
+"0",")"
+"0",""
+"0","conf_mat"
+"1","      Predicted
+"
+"1","Actual"
+"1","   0"
+"1","   1"
+"1","
+     0"
+"1"," 314"
+"1","  26"
+"1","
+     1"
+"1","  19"
+"1"," 241"
+"1","
+"
+"0","print(diag(conf_mat) / rowSums(conf_mat))"
+"1","        0 "
+"1","        1 "
+"1","
+"
+"1","0.9235294 "
+"1","0.9269231 "
+"1","
+"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/c60fx7tj15bk5/00000f.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/c60fx7tj15bk5/00000f.csv
@ -0,0 +1,5 @@
+"0","print(1 - sum(diag(conf_mat)) / sum(conf_mat))"
+"1","[1]"
+"1"," 0.075"
+"1","
+"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cce5y7xzr9zk6/00000f.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cce5y7xzr9zk6/00000f.csv
@ -0,0 +1,27 @@
+"0","pruned_pred = predict(pruned_tree, test_df, type=""class"")"
+"0","pruned_conf_mat = table(Actual = test_df$MYDEPV, Predicted = pruned_pred)"
+"0","pruned_conf_mat"
+"1","      Predicted
+"
+"1","Actual"
+"1","  0"
+"1","  1"
+"1","
+     0"
+"1"," 82"
+"1","  4"
+"1","
+     1"
+"1"," 13"
+"1"," 51"
+"1","
+"
+"0","print(diag(pruned_conf_mat) / rowSums(pruned_conf_mat))"
+"1","        0 "
+"1","        1 "
+"1","
+"
+"1","0.9534884 "
+"1","0.7968750 "
+"1","
+"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/00000f.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/00000f.csv
@ -0,0 +1,107 @@
+"0","library(rpart)"
+"0","tree = rpart("
+"0","  MYDEPV ~ Price + Income + Age,"
+"0","  data = train_df,"
+"0","  method = ""class"","
+"0","  parms = list(split = ""information""),"
+"0","  control = rpart.control("
+"0","    xval = 3,"
+"0","  ),"
+"0",")"
+"0","printcp(tree)"
+"1","
+Classification tree:
+"
+"1","rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df, 
+"
+"1","    method = ""class"", parms = list(split = ""information""), control = rpart.control(xval = 3, 
+"
+"1","        ))
+"
+"1","
+"
+"1","Variables actually used in tree construction:
+"
+"1","[1]"
+"1"," Age   "
+"1"," Income"
+"1"," Price "
+"1","
+"
+"1","
+"
+"1","Root node error: "
+"1",""
+"1","260"
+"1",""
+"1","/"
+"1",""
+"1","600"
+"1",""
+"1"," = "
+"1",""
+"1","0.43333"
+"1",""
+"1","
+
+"
+"1","n="
+"1"," "
+"1","600"
+"1"," "
+"1","
+
+"
+"1"," "
+"1","       CP"
+"1"," nsplit"
+"1"," rel error"
+"1","  xerror"
+"1","     xstd"
+"1","
+1"
+"1"," 0.692308"
+"1","      0"
+"1","   1.00000"
+"1"," 1.00000"
+"1"," 0.046685"
+"1","
+2"
+"1"," 0.025000"
+"1","      1"
+"1","   0.30769"
+"1"," 0.31154"
+"1"," 0.032194"
+"1","
+3"
+"1"," 0.011538"
+"1","      3"
+"1","   0.25769"
+"1"," 0.27308"
+"1"," 0.030430"
+"1","
+4"
+"1"," 0.010256"
+"1","      5"
+"1","   0.23462"
+"1"," 0.26923"
+"1"," 0.030244"
+"1","
+5"
+"1"," 0.010000"
+"1","     11"
+"1","   0.17308"
+"1"," 0.26923"
+"1"," 0.030244"
+"1","
+"
+"0","library(rpart.plot)"
+"0",""
+"0","rpart.plot("
+"0","  tree,"
+"0","  type = 1,"
+"0","  extra = 106,"
+"0","  #6 Class models: the probability of the second class only. Useful for binary responses."
+"0","  #100 display the percentage of observations in the node. "
+"0","  fallen.leaves = TRUE,"
+"0",")"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/000010.metadata
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/000010.metadata
@ -0,0 +1 @@
+{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/000010.png
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/000010.png
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/000010.snapshot
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/000010.snapshot
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/chunks.json
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/chunks.json
@ -0,0 +1 @@
+{"chunk_definitions":[{"row":14,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-3","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cw3y8fjmo2ayt","chunk_label":"unnamed-chunk-1"},{"row":77,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-4","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cgb1v2g83kknt","chunk_label":"unnamed-chunk-2"},{"row":89,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-5","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c3jleyvkqxnqm","chunk_label":"unnamed-chunk-3"},{"row":95,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-6","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c60fx7tj15bk5","chunk_label":"unnamed-chunk-4"},{"row":111,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-7","dev":"png"},"document_id":"36F8AE4B","chunk_id":"csdwusaa8puvd","chunk_label":"unnamed-chunk-5"},{"row":142,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-18","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cr3h7jd3nr0ya","chunk_label":"unnamed-chunk-6"},{"row":153,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-19","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cpyo5ihaht7o1","chunk_label":"unnamed-chunk-7"},{"row":160,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-20","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cce5y7xzr9zk6","chunk_label":"unnamed-chunk-8"}],"doc_write_time":1769443515}
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000011.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000011.csv
@ -0,0 +1,79 @@
+"0","best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, ""xerror""]), ""CP""]"
+"0","best_cp"
+"1","[1]"
+"1"," 0.01153846"
+"1","
+"
+"0","pruned_tree = prune(tree_gini, cp = best_cp)"
+"0",""
+"0","printcp(pruned_tree)"
+"1","
+Classification tree:
+"
+"1","rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df, 
+"
+"1","    method = ""class"", parms = list(split = ""gini""))
+"
+"1","
+"
+"1","Variables actually used in tree construction:
+"
+"1","[1]"
+"1"," Income"
+"1"," Price "
+"1","
+"
+"1","
+"
+"1","Root node error: "
+"1",""
+"1","260"
+"1",""
+"1","/"
+"1",""
+"1","600"
+"1",""
+"1"," = "
+"1",""
+"1","0.43333"
+"1",""
+"1","
+
+"
+"1","n="
+"1"," "
+"1","600"
+"1"," "
+"1","
+
+"
+"1"," "
+"1","       CP"
+"1"," nsplit"
+"1"," rel error"
+"1","  xerror"
+"1","     xstd"
+"1","
+1"
+"1"," 0.692308"
+"1","      0"
+"1","   1.00000"
+"1"," 1.00000"
+"1"," 0.046685"
+"1","
+2"
+"1"," 0.025000"
+"1","      1"
+"1","   0.30769"
+"1"," 0.31154"
+"1"," 0.032194"
+"1","
+3"
+"1"," 0.011538"
+"1","      3"
+"1","   0.25769"
+"1"," 0.26538"
+"1"," 0.030055"
+"1","
+"
+"0","rpart.plot(pruned_tree)"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000012.metadata
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000012.metadata
@ -0,0 +1 @@
+{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000012.png
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000012.png
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000012.snapshot
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000012.snapshot
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000015.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000015.csv
@ -0,0 +1,125 @@
+"0","pred_test = predict(tree, test_df, type=""class"")"
+"0","conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)"
+"0","conf_mat_test"
+"1","      Predicted
+"
+"1","Actual"
+"1","  0"
+"1","  1"
+"1","
+     0"
+"1"," 76"
+"1"," 10"
+"1","
+     1"
+"1","  6"
+"1"," 58"
+"1","
+"
+"0","print(diag(conf_mat_test) / rowSums(conf_mat_test))"
+"1","        0 "
+"1","        1 "
+"1","
+"
+"1","0.8837209 "
+"1","0.9062500 "
+"1","
+"
+"0","tree_gini = rpart("
+"0","  MYDEPV ~ Price + Income + Age,"
+"0","  data = train_df,"
+"0","  method = ""class"","
+"0","  parms = list(split = ""gini"")"
+"0",")"
+"0",""
+"0","printcp(tree_gini)"
+"1","
+Classification tree:
+"
+"1","rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df, 
+"
+"1","    method = ""class"", parms = list(split = ""gini""))
+"
+"1","
+"
+"1","Variables actually used in tree construction:
+"
+"1","[1]"
+"1"," Age   "
+"1"," Income"
+"1"," Price "
+"1","
+"
+"1","
+"
+"1","Root node error: "
+"1",""
+"1","260"
+"1",""
+"1","/"
+"1",""
+"1","600"
+"1",""
+"1"," = "
+"1",""
+"1","0.43333"
+"1",""
+"1","
+
+"
+"1","n="
+"1"," "
+"1","600"
+"1"," "
+"1","
+
+"
+"1"," "
+"1","       CP"
+"1"," nsplit"
+"1"," rel error"
+"1","  xerror"
+"1","     xstd"
+"1","
+1"
+"1"," 0.692308"
+"1","      0"
+"1","   1.00000"
+"1"," 1.00000"
+"1"," 0.046685"
+"1","
+2"
+"1"," 0.025000"
+"1","      1"
+"1","   0.30769"
+"1"," 0.31154"
+"1"," 0.032194"
+"1","
+3"
+"1"," 0.011538"
+"1","      3"
+"1","   0.25769"
+"1"," 0.26538"
+"1"," 0.030055"
+"1","
+4"
+"1"," 0.010256"
+"1","      5"
+"1","   0.23462"
+"1"," 0.28846"
+"1"," 0.031157"
+"1","
+5"
+"1"," 0.010000"
+"1","     11"
+"1","   0.17308"
+"1"," 0.28462"
+"1"," 0.030978"
+"1","
+"
+"0","rpart.plot("
+"0","  tree_gini,"
+"0","  type = 1,"
+"0","  extra = 106,"
+"0","  fallen.leaves = TRUE,"
+"0",")"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000016.metadata
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000016.metadata
@ -0,0 +1 @@
+{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000016.png
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000016.png
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000016.snapshot
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000016.snapshot
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/00000f.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/00000f.csv
@ -0,0 +1,7 @@
+"0","pred_prob = predict(tree, train_df, type=""prob"")[,2]"
+"0",""
+"0","library(ROCR)"
+"0","pred = prediction(pred_prob, train_df$MYDEPV)"
+"0","perf = performance(pred, ""tpr"", ""fpr"")"
+"0",""
+"0","plot(perf)"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000011.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000011.csv
@ -0,0 +1 @@
+"0","abline(a = 0, b = 1)"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000012.metadata
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000012.metadata
@ -0,0 +1 @@
+{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000012.png
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000012.png
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000012.snapshot
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000012.snapshot
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000013.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000013.csv
@ -0,0 +1,7 @@
+"0",""
+"0","auc_perf = performance(pred, measure = ""auc"")"
+"0","auc_perf@y.values[[1]]"
+"1","[1]"
+"1"," 0.9720645"
+"1","
+"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cw3y8fjmo2ayt/00000f.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cw3y8fjmo2ayt/00000f.csv
@ -0,0 +1,5 @@
+"0","setwd('/home/sek1ro/git/public/lab/ds/25-1/r')"
+"0","survey <- read.csv('survey.csv')"
+"0",""
+"0","train_df = survey[1:600,]"
+"0","test_df = survey[601:750,]"
--- a/science/r/.Rproj.user/shared/notebooks/patch-chunk-names
+++ b/science/r/.Rproj.user/shared/notebooks/patch-chunk-names
--- a/science/r/.Rproj.user/shared/notebooks/paths
+++ b/science/r/.Rproj.user/shared/notebooks/paths
@ -0,0 +1 @@
+/home/sek1ro/git/public/lab/ds/25-1/r/9.Rmd="EB7B11F9"
				`@ -0,0 +1 @@`
				{"chunk_definitions":[{"row":14,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-3","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cw3y8fjmo2ayt","chunk_label":"unnamed-chunk-1"},{"row":77,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-4","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cgb1v2g83kknt","chunk_label":"unnamed-chunk-2"},{"row":89,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-5","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c3jleyvkqxnqm","chunk_label":"unnamed-chunk-3"},{"row":95,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-6","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c60fx7tj15bk5","chunk_label":"unnamed-chunk-4"},{"row":111,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-7","dev":"png"},"document_id":"36F8AE4B","chunk_id":"csdwusaa8puvd","chunk_label":"unnamed-chunk-5"},{"row":142,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-18","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cr3h7jd3nr0ya","chunk_label":"unnamed-chunk-6"},{"row":153,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-19","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cpyo5ihaht7o1","chunk_label":"unnamed-chunk-7"},{"row":160,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-20","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cce5y7xzr9zk6","chunk_label":"unnamed-chunk-8"}],"doc_write_time":1769443515}
				`@ -0,0 +1 @@`
				`{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}`
				`@ -0,0 +1 @@`
				`/home/sek1ro/git/public/lab/ds/25-1/r/9.Rmd="EB7B11F9"`