This commit is contained in:
2026-02-17 23:13:20 +03:00
parent 65218abfb1
commit e52dde575a
429 changed files with 875 additions and 14 deletions

View File

@ -0,0 +1 @@
{"chunk_definitions":[{"row":14,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-3","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cw3y8fjmo2ayt","chunk_label":"unnamed-chunk-1"},{"row":77,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-4","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cgb1v2g83kknt","chunk_label":"unnamed-chunk-2"},{"row":89,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-5","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c3jleyvkqxnqm","chunk_label":"unnamed-chunk-3"},{"row":95,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-6","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c60fx7tj15bk5","chunk_label":"unnamed-chunk-4"},{"row":111,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-7","dev":"png"},"document_id":"36F8AE4B","chunk_id":"csdwusaa8puvd","chunk_label":"unnamed-chunk-5"},{"row":142,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-18","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cr3h7jd3nr0ya","chunk_label":"unnamed-chunk-6"},{"row":153,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-19","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cpyo5ihaht7o1","chunk_label":"unnamed-chunk-7"},{"row":160,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-20","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cce5y7xzr9zk6","chunk_label":"unnamed-chunk-8"}],"doc_write_time":1769443515}

View File

@ -0,0 +1,32 @@
"0","pred_class = predict(tree, train_df, type=""class"")"
"0",""
"0","conf_mat = table("
"0"," Actual = train_df$MYDEPV,"
"0"," Predicted = pred_class"
"0",")"
"0",""
"0","conf_mat"
"1"," Predicted
"
"1","Actual"
"1"," 0"
"1"," 1"
"1","
0"
"1"," 314"
"1"," 26"
"1","
1"
"1"," 19"
"1"," 241"
"1","
"
"0","print(diag(conf_mat) / rowSums(conf_mat))"
"1"," 0 "
"1"," 1 "
"1","
"
"1","0.9235294 "
"1","0.9269231 "
"1","
"
1 0 pred_class = predict(tree, train_df, type="class")
2 0
3 0 conf_mat = table(
4 0 Actual = train_df$MYDEPV,
5 0 Predicted = pred_class
6 0 )
7 0
8 0 conf_mat
9 1 Predicted
10 1 Actual
11 1 0
12 1 1
13 1 0
14 1 314
15 1 26
16 1 1
17 1 19
18 1 241
19 1
20 0 print(diag(conf_mat) / rowSums(conf_mat))
21 1 0
22 1 1
23 1
24 1 0.9235294
25 1 0.9269231
26 1

View File

@ -0,0 +1,5 @@
"0","print(1 - sum(diag(conf_mat)) / sum(conf_mat))"
"1","[1]"
"1"," 0.075"
"1","
"
1 0 print(1 - sum(diag(conf_mat)) / sum(conf_mat))
2 1 [1]
3 1 0.075
4 1

View File

@ -0,0 +1,27 @@
"0","pruned_pred = predict(pruned_tree, test_df, type=""class"")"
"0","pruned_conf_mat = table(Actual = test_df$MYDEPV, Predicted = pruned_pred)"
"0","pruned_conf_mat"
"1"," Predicted
"
"1","Actual"
"1"," 0"
"1"," 1"
"1","
0"
"1"," 82"
"1"," 4"
"1","
1"
"1"," 13"
"1"," 51"
"1","
"
"0","print(diag(pruned_conf_mat) / rowSums(pruned_conf_mat))"
"1"," 0 "
"1"," 1 "
"1","
"
"1","0.9534884 "
"1","0.7968750 "
"1","
"
1 0 pruned_pred = predict(pruned_tree, test_df, type="class")
2 0 pruned_conf_mat = table(Actual = test_df$MYDEPV, Predicted = pruned_pred)
3 0 pruned_conf_mat
4 1 Predicted
5 1 Actual
6 1 0
7 1 1
8 1 0
9 1 82
10 1 4
11 1 1
12 1 13
13 1 51
14 1
15 0 print(diag(pruned_conf_mat) / rowSums(pruned_conf_mat))
16 1 0
17 1 1
18 1
19 1 0.9534884
20 1 0.7968750
21 1

View File

@ -0,0 +1,107 @@
"0","library(rpart)"
"0","tree = rpart("
"0"," MYDEPV ~ Price + Income + Age,"
"0"," data = train_df,"
"0"," method = ""class"","
"0"," parms = list(split = ""information""),"
"0"," control = rpart.control("
"0"," xval = 3,"
"0"," ),"
"0",")"
"0","printcp(tree)"
"1","
Classification tree:
"
"1","rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df,
"
"1"," method = ""class"", parms = list(split = ""information""), control = rpart.control(xval = 3,
"
"1"," ))
"
"1","
"
"1","Variables actually used in tree construction:
"
"1","[1]"
"1"," Age "
"1"," Income"
"1"," Price "
"1","
"
"1","
"
"1","Root node error: "
"1",""
"1","260"
"1",""
"1","/"
"1",""
"1","600"
"1",""
"1"," = "
"1",""
"1","0.43333"
"1",""
"1","
"
"1","n="
"1"," "
"1","600"
"1"," "
"1","
"
"1"," "
"1"," CP"
"1"," nsplit"
"1"," rel error"
"1"," xerror"
"1"," xstd"
"1","
1"
"1"," 0.692308"
"1"," 0"
"1"," 1.00000"
"1"," 1.00000"
"1"," 0.046685"
"1","
2"
"1"," 0.025000"
"1"," 1"
"1"," 0.30769"
"1"," 0.31154"
"1"," 0.032194"
"1","
3"
"1"," 0.011538"
"1"," 3"
"1"," 0.25769"
"1"," 0.27308"
"1"," 0.030430"
"1","
4"
"1"," 0.010256"
"1"," 5"
"1"," 0.23462"
"1"," 0.26923"
"1"," 0.030244"
"1","
5"
"1"," 0.010000"
"1"," 11"
"1"," 0.17308"
"1"," 0.26923"
"1"," 0.030244"
"1","
"
"0","library(rpart.plot)"
"0",""
"0","rpart.plot("
"0"," tree,"
"0"," type = 1,"
"0"," extra = 106,"
"0"," #6 Class models: the probability of the second class only. Useful for binary responses."
"0"," #100 display the percentage of observations in the node. "
"0"," fallen.leaves = TRUE,"
"0",")"
1 0 library(rpart)
2 0 tree = rpart(
3 0 MYDEPV ~ Price + Income + Age,
4 0 data = train_df,
5 0 method = "class",
6 0 parms = list(split = "information"),
7 0 control = rpart.control(
8 0 xval = 3,
9 0 ),
10 0 )
11 0 printcp(tree)
12 1 Classification tree:
13 1 rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df,
14 1 method = "class", parms = list(split = "information"), control = rpart.control(xval = 3,
15 1 ))
16 1
17 1 Variables actually used in tree construction:
18 1 [1]
19 1 Age
20 1 Income
21 1 Price
22 1
23 1
24 1 Root node error:
25 1
26 1 260
27 1
28 1 /
29 1
30 1 600
31 1
32 1 =
33 1
34 1 0.43333
35 1
36 1
37 1 n=
38 1
39 1 600
40 1
41 1
42 1
43 1 CP
44 1 nsplit
45 1 rel error
46 1 xerror
47 1 xstd
48 1 1
49 1 0.692308
50 1 0
51 1 1.00000
52 1 1.00000
53 1 0.046685
54 1 2
55 1 0.025000
56 1 1
57 1 0.30769
58 1 0.31154
59 1 0.032194
60 1 3
61 1 0.011538
62 1 3
63 1 0.25769
64 1 0.27308
65 1 0.030430
66 1 4
67 1 0.010256
68 1 5
69 1 0.23462
70 1 0.26923
71 1 0.030244
72 1 5
73 1 0.010000
74 1 11
75 1 0.17308
76 1 0.26923
77 1 0.030244
78 1
79 0 library(rpart.plot)
80 0
81 0 rpart.plot(
82 0 tree,
83 0 type = 1,
84 0 extra = 106,
85 0 #6 Class models: the probability of the second class only. Useful for binary responses.
86 0 #100 display the percentage of observations in the node.
87 0 fallen.leaves = TRUE,
88 0 )

View File

@ -0,0 +1 @@
{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

View File

@ -0,0 +1 @@
{"chunk_definitions":[{"row":14,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-3","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cw3y8fjmo2ayt","chunk_label":"unnamed-chunk-1"},{"row":77,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-4","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cgb1v2g83kknt","chunk_label":"unnamed-chunk-2"},{"row":89,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-5","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c3jleyvkqxnqm","chunk_label":"unnamed-chunk-3"},{"row":95,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-6","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c60fx7tj15bk5","chunk_label":"unnamed-chunk-4"},{"row":111,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-7","dev":"png"},"document_id":"36F8AE4B","chunk_id":"csdwusaa8puvd","chunk_label":"unnamed-chunk-5"},{"row":142,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-18","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cr3h7jd3nr0ya","chunk_label":"unnamed-chunk-6"},{"row":153,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-19","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cpyo5ihaht7o1","chunk_label":"unnamed-chunk-7"},{"row":160,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-20","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cce5y7xzr9zk6","chunk_label":"unnamed-chunk-8"}],"doc_write_time":1769443515}

View File

@ -0,0 +1,79 @@
"0","best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, ""xerror""]), ""CP""]"
"0","best_cp"
"1","[1]"
"1"," 0.01153846"
"1","
"
"0","pruned_tree = prune(tree_gini, cp = best_cp)"
"0",""
"0","printcp(pruned_tree)"
"1","
Classification tree:
"
"1","rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df,
"
"1"," method = ""class"", parms = list(split = ""gini""))
"
"1","
"
"1","Variables actually used in tree construction:
"
"1","[1]"
"1"," Income"
"1"," Price "
"1","
"
"1","
"
"1","Root node error: "
"1",""
"1","260"
"1",""
"1","/"
"1",""
"1","600"
"1",""
"1"," = "
"1",""
"1","0.43333"
"1",""
"1","
"
"1","n="
"1"," "
"1","600"
"1"," "
"1","
"
"1"," "
"1"," CP"
"1"," nsplit"
"1"," rel error"
"1"," xerror"
"1"," xstd"
"1","
1"
"1"," 0.692308"
"1"," 0"
"1"," 1.00000"
"1"," 1.00000"
"1"," 0.046685"
"1","
2"
"1"," 0.025000"
"1"," 1"
"1"," 0.30769"
"1"," 0.31154"
"1"," 0.032194"
"1","
3"
"1"," 0.011538"
"1"," 3"
"1"," 0.25769"
"1"," 0.26538"
"1"," 0.030055"
"1","
"
"0","rpart.plot(pruned_tree)"
1 0 best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
2 0 best_cp
3 1 [1]
4 1 0.01153846
5 1
6 0 pruned_tree = prune(tree_gini, cp = best_cp)
7 0
8 0 printcp(pruned_tree)
9 1 Classification tree:
10 1 rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df,
11 1 method = "class", parms = list(split = "gini"))
12 1
13 1 Variables actually used in tree construction:
14 1 [1]
15 1 Income
16 1 Price
17 1
18 1
19 1 Root node error:
20 1
21 1 260
22 1
23 1 /
24 1
25 1 600
26 1
27 1 =
28 1
29 1 0.43333
30 1
31 1
32 1 n=
33 1
34 1 600
35 1
36 1
37 1
38 1 CP
39 1 nsplit
40 1 rel error
41 1 xerror
42 1 xstd
43 1 1
44 1 0.692308
45 1 0
46 1 1.00000
47 1 1.00000
48 1 0.046685
49 1 2
50 1 0.025000
51 1 1
52 1 0.30769
53 1 0.31154
54 1 0.032194
55 1 3
56 1 0.011538
57 1 3
58 1 0.25769
59 1 0.26538
60 1 0.030055
61 1
62 0 rpart.plot(pruned_tree)

View File

@ -0,0 +1 @@
{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

View File

@ -0,0 +1,125 @@
"0","pred_test = predict(tree, test_df, type=""class"")"
"0","conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)"
"0","conf_mat_test"
"1"," Predicted
"
"1","Actual"
"1"," 0"
"1"," 1"
"1","
0"
"1"," 76"
"1"," 10"
"1","
1"
"1"," 6"
"1"," 58"
"1","
"
"0","print(diag(conf_mat_test) / rowSums(conf_mat_test))"
"1"," 0 "
"1"," 1 "
"1","
"
"1","0.8837209 "
"1","0.9062500 "
"1","
"
"0","tree_gini = rpart("
"0"," MYDEPV ~ Price + Income + Age,"
"0"," data = train_df,"
"0"," method = ""class"","
"0"," parms = list(split = ""gini"")"
"0",")"
"0",""
"0","printcp(tree_gini)"
"1","
Classification tree:
"
"1","rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df,
"
"1"," method = ""class"", parms = list(split = ""gini""))
"
"1","
"
"1","Variables actually used in tree construction:
"
"1","[1]"
"1"," Age "
"1"," Income"
"1"," Price "
"1","
"
"1","
"
"1","Root node error: "
"1",""
"1","260"
"1",""
"1","/"
"1",""
"1","600"
"1",""
"1"," = "
"1",""
"1","0.43333"
"1",""
"1","
"
"1","n="
"1"," "
"1","600"
"1"," "
"1","
"
"1"," "
"1"," CP"
"1"," nsplit"
"1"," rel error"
"1"," xerror"
"1"," xstd"
"1","
1"
"1"," 0.692308"
"1"," 0"
"1"," 1.00000"
"1"," 1.00000"
"1"," 0.046685"
"1","
2"
"1"," 0.025000"
"1"," 1"
"1"," 0.30769"
"1"," 0.31154"
"1"," 0.032194"
"1","
3"
"1"," 0.011538"
"1"," 3"
"1"," 0.25769"
"1"," 0.26538"
"1"," 0.030055"
"1","
4"
"1"," 0.010256"
"1"," 5"
"1"," 0.23462"
"1"," 0.28846"
"1"," 0.031157"
"1","
5"
"1"," 0.010000"
"1"," 11"
"1"," 0.17308"
"1"," 0.28462"
"1"," 0.030978"
"1","
"
"0","rpart.plot("
"0"," tree_gini,"
"0"," type = 1,"
"0"," extra = 106,"
"0"," fallen.leaves = TRUE,"
"0",")"
1 0 pred_test = predict(tree, test_df, type="class")
2 0 conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)
3 0 conf_mat_test
4 1 Predicted
5 1 Actual
6 1 0
7 1 1
8 1 0
9 1 76
10 1 10
11 1 1
12 1 6
13 1 58
14 1
15 0 print(diag(conf_mat_test) / rowSums(conf_mat_test))
16 1 0
17 1 1
18 1
19 1 0.8837209
20 1 0.9062500
21 1
22 0 tree_gini = rpart(
23 0 MYDEPV ~ Price + Income + Age,
24 0 data = train_df,
25 0 method = "class",
26 0 parms = list(split = "gini")
27 0 )
28 0
29 0 printcp(tree_gini)
30 1 Classification tree:
31 1 rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df,
32 1 method = "class", parms = list(split = "gini"))
33 1
34 1 Variables actually used in tree construction:
35 1 [1]
36 1 Age
37 1 Income
38 1 Price
39 1
40 1
41 1 Root node error:
42 1
43 1 260
44 1
45 1 /
46 1
47 1 600
48 1
49 1 =
50 1
51 1 0.43333
52 1
53 1
54 1 n=
55 1
56 1 600
57 1
58 1
59 1
60 1 CP
61 1 nsplit
62 1 rel error
63 1 xerror
64 1 xstd
65 1 1
66 1 0.692308
67 1 0
68 1 1.00000
69 1 1.00000
70 1 0.046685
71 1 2
72 1 0.025000
73 1 1
74 1 0.30769
75 1 0.31154
76 1 0.032194
77 1 3
78 1 0.011538
79 1 3
80 1 0.25769
81 1 0.26538
82 1 0.030055
83 1 4
84 1 0.010256
85 1 5
86 1 0.23462
87 1 0.28846
88 1 0.031157
89 1 5
90 1 0.010000
91 1 11
92 1 0.17308
93 1 0.28462
94 1 0.030978
95 1
96 0 rpart.plot(
97 0 tree_gini,
98 0 type = 1,
99 0 extra = 106,
100 0 fallen.leaves = TRUE,
101 0 )

View File

@ -0,0 +1 @@
{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

View File

@ -0,0 +1,7 @@
"0","pred_prob = predict(tree, train_df, type=""prob"")[,2]"
"0",""
"0","library(ROCR)"
"0","pred = prediction(pred_prob, train_df$MYDEPV)"
"0","perf = performance(pred, ""tpr"", ""fpr"")"
"0",""
"0","plot(perf)"
1 0 pred_prob = predict(tree, train_df, type="prob")[,2]
2 0
3 0 library(ROCR)
4 0 pred = prediction(pred_prob, train_df$MYDEPV)
5 0 perf = performance(pred, "tpr", "fpr")
6 0
7 0 plot(perf)

View File

@ -0,0 +1 @@
"0","abline(a = 0, b = 1)"
1 0 abline(a = 0, b = 1)

View File

@ -0,0 +1 @@
{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -0,0 +1,7 @@
"0",""
"0","auc_perf = performance(pred, measure = ""auc"")"
"0","auc_perf@y.values[[1]]"
"1","[1]"
"1"," 0.9720645"
"1","
"
1 0
2 0 auc_perf = performance(pred, measure = "auc")
3 0 auc_perf@y.values[[1]]
4 1 [1]
5 1 0.9720645
6 1

View File

@ -0,0 +1,5 @@
"0","setwd('/home/sek1ro/git/public/lab/ds/25-1/r')"
"0","survey <- read.csv('survey.csv')"
"0",""
"0","train_df = survey[1:600,]"
"0","test_df = survey[601:750,]"
1 0 setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
2 0 survey <- read.csv('survey.csv')
3 0
4 0 train_df = survey[1:600,]
5 0 test_df = survey[601:750,]