renaming

2026-02-17 23:13:20 +03:00
parent 65218abfb1
commit e52dde575a
429 changed files with 875 additions and 14 deletions
--- a/science/r/.RData
+++ b/science/r/.RData
--- a/science/r/.Rhistory
+++ b/science/r/.Rhistory
@ -0,0 +1,226 @@
+a <- 3
+v <- c(1,2,3,4,5)
+q()
+a <- 3
+v<-c(1,2,3,4,5)
+read.table("git/public/lab/ds/25-1/r2/zipIncome.txt")
+db = read.table("git/public/lab/ds/25-1/r2/zipIncome.txt")
+levels<-c(1,2,3)
+ratings<-c("a", "b", "c")
+f<-factor(ratings, levels)
+print(f)
+print(f.size())
+print(f)
+print(levels.default())
+print(levels[0])
+print(levels[1])
+print(ratings[1])
+print(ratings[2])
+print(ratings[0])
+print(ratings[4])
+print(ratings[3])
+levels<-("a", "b", "a", "c")
+levels<-("a", "b", "a", "c")
+levels<-c("a", "b", "a", "c")
+f<-factor(ratings, levels)
+tmp<-levels
+levels<-ratings
+ratings<-tmp
+remove(tmp)
+f<-factor(ratings, levels)
+f[0]
+f[1]
+f[2]
+f[3]
+f[4]
+f[5]
+pi
+e
+exp
+exp()
+v <- c(1:10)
+w <- c(15:24)
+x <- v
+x = v
+x <- v * w
+db[1:3]
+db[1:2]
+db[1]
+db[1,]
+db[1:2,]
+db[2,]
+db[1,]
+db[,1]
+db$state
+db
+db$V1
+db$q
+db[,1]
+db[1,]
+db$zip_prefixes
+db$name
+db["zip_prefixes"]
+df <- data.frame(name=c("a"))
+fd
+df
+df$name
+df[]
+df[1]
+df[,1]
+df[,2]
+df[1,]
+class(df)
+printd
+print(d)
+summary(x)
+var(x)
+sd(x)
+range(x)
+plot(x)
+hist(x)
+str(x)
+help(plot)
+setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
+survey <- read.csv('survey.csv')
+head(survey)
+survey$price20 <- ifelse(survey$Price == 20, 1, 0)
+survey$price30 <- ifelse(survey$Price == 30, 1, 0)
+head(survey)
+survey$one <- 1
+model <- glm(
+MYDEPV ~ Income + Age + price20 + price30,
+binomial(link = "logit"),
+survey
+)
+summary(model)
+quantile(residuals(model))
+pred_test = predict(tree, test_df, type="class")
+setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
+survey <- read.csv('survey.csv')
+train_df = survey[1:600,]
+test_df = survey[601:750,]
+setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
+survey <- read.csv('survey.csv')
+train_df = survey[1:600,]
+test_df = survey[601:750,]
+library(rpart)
+tree = rpart(
+MYDEPV ~ Price + Income + Age,
+data = train_df,
+method = "class",
+parms = list(split = "information"),
+control = rpart.control(
+xval = 3,
+),
+)
+printcp(tree)
+library(rpart.plot)
+rpart.plot(
+tree,
+type = 1,
+extra = 106,
+#6 Class models: the probability of the second class only. Useful for binary responses.
+#100 display the percentage of observations in the node.
+fallen.leaves = TRUE,
+)
+pred_class = predict(tree, train_df, type="class")
+conf_mat = table(
+Actual = train_df$MYDEPV,
+Predicted = pred_class
+)
+conf_mat
+print(diag(conf_mat) / rowSums(conf_mat))
+print(1 - sum(diag(conf_mat)) / sum(conf_mat))
+pred_prob = predict(tree, train_df, type="prob")[,2]
+library(ROCR)
+pred = prediction(pred_prob, train_df$MYDEPV)
+perf = performance(pred, "tpr", "fpr")
+plot(perf)
+abline(a = 0, b = 1)
+auc_perf = performance(pred, measure = "auc")
+auc_perf@y.values[[1]]
+pred_test = predict(tree, test_df, type="class")
+conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)
+conf_mat_test
+print(diag(conf_mat) / rowSums(conf_mat))
+tree_gini = rpart(
+MYDEPV ~ Price + Income + Age,
+data = train_df,
+method = "class",
+parms = list(split = "gini")
+)
+printcp(tree_gini)
+rpart.plot(
+tree_gini,
+type = 1,
+extra = 106,
+fallen.leaves = TRUE,
+)
+best_cp <- tree$cptable[which.min(tree_dini$cptable[, "xerror"]), "CP"]
+best_cp <- tree_dini$cptable[which.min(tree_dini$cptable[, "xerror"]), "CP"]
+best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
+best_cp
+best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
+best_cp
+pruned_tree = prune(tree_gini, cp = best_cp)
+best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
+best_cp
+pruned_tree = prune(tree_gini, cp = best_cp)
+printcp(pruned_tree)
+best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
+best_cp
+pruned_tree = prune(tree_gini, cp = best_cp)
+printcp(pruned_tree)
+rpart.plot(pruned_tree)
+pred_test = predict(tree, test_df, type="class")
+conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)
+conf_mat_test
+print(diag(conf_mat_test) / rowSums(conf_mat_test))
+tree_gini = rpart(
+MYDEPV ~ Price + Income + Age,
+data = train_df,
+method = "class",
+parms = list(split = "gini")
+)
+printcp(tree_gini)
+rpart.plot(
+tree_gini,
+type = 1,
+extra = 106,
+fallen.leaves = TRUE,
+)
+best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
+best_cp
+pruned_tree = prune(tree_gini, cp = best_cp)
+printcp(pruned_tree)
+rpart.plot(pruned_tree)
+pruned_pred = predict(pruned_tree, test_df, type="class")
+pruned_conf_mat = table(Actual = test_df$MYDEPV, Predicted = pruned_pred)
+pruned_conf_mat
+print(diag(pruned_conf_mat) / rowSums(pruned_conf_mat))
+pred_test = predict(tree, test_df, type="class")
+conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)
+conf_mat_test
+print(diag(conf_mat_test) / rowSums(conf_mat_test))
+tree_gini = rpart(
+MYDEPV ~ Price + Income + Age,
+data = train_df,
+method = "class",
+parms = list(split = "gini")
+)
+printcp(tree_gini)
+rpart.plot(
+tree_gini,
+type = 1,
+extra = 106,
+fallen.leaves = TRUE,
+)
+best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
+best_cp
+pruned_tree = prune(tree_gini, cp = best_cp)
+printcp(pruned_tree)
+rpart.plot(pruned_tree)
+pruned_pred = predict(pruned_tree, test_df, type="class")
+pruned_conf_mat = table(Actual = test_df$MYDEPV, Predicted = pruned_pred)
+pruned_conf_mat
+print(diag(pruned_conf_mat) / rowSums(pruned_conf_mat))
--- a/science/r/.Rproj.user/C6239C96/pcs/files-pane.pper
+++ b/science/r/.Rproj.user/C6239C96/pcs/files-pane.pper
@ -0,0 +1,9 @@
+{
+    "sortOrder": [
+        {
+            "columnIndex": 2,
+            "ascending": true
+        }
+    ],
+    "path": "~/git/public/lab/ds/25-1/r"
+}
--- a/science/r/.Rproj.user/C6239C96/pcs/packages-pane.pper
+++ b/science/r/.Rproj.user/C6239C96/pcs/packages-pane.pper
@ -0,0 +1,7 @@
+{
+    "installOptions": {
+        "installFromRepository": true,
+        "libraryPath": "/home/sek1ro/R/x86_64-pc-linux-gnu-library/4.5",
+        "installDependencies": true
+    }
+}
--- a/science/r/.Rproj.user/C6239C96/pcs/source-pane.pper
+++ b/science/r/.Rproj.user/C6239C96/pcs/source-pane.pper
@ -0,0 +1,3 @@
+{
+    "activeTab": 0
+}
--- a/science/r/.Rproj.user/C6239C96/pcs/windowlayoutstate.pper
+++ b/science/r/.Rproj.user/C6239C96/pcs/windowlayoutstate.pper
@ -0,0 +1,14 @@
+{
+    "left": {
+        "splitterpos": 453,
+        "topwindowstate": "NORMAL",
+        "panelheight": 1097,
+        "windowheight": 1135
+    },
+    "right": {
+        "splitterpos": 680,
+        "topwindowstate": "NORMAL",
+        "panelheight": 1097,
+        "windowheight": 1135
+    }
+}
--- a/science/r/.Rproj.user/C6239C96/pcs/workbench-pane.pper
+++ b/science/r/.Rproj.user/C6239C96/pcs/workbench-pane.pper
@ -0,0 +1,5 @@
+{
+    "TabSet1": 0,
+    "TabSet2": 0,
+    "TabZoom": {}
+}
--- a/science/r/.Rproj.user/C6239C96/rmd-outputs
+++ b/science/r/.Rproj.user/C6239C96/rmd-outputs
@ -0,0 +1,5 @@
+
+
+
+
+
--- a/science/r/.Rproj.user/C6239C96/saved_source_markers
+++ b/science/r/.Rproj.user/C6239C96/saved_source_markers
@ -0,0 +1 @@
+{"active_set":"","sets":[]}
--- a/science/r/.Rproj.user/C6239C96/sources/per/t/36F8AE4B
+++ b/science/r/.Rproj.user/C6239C96/sources/per/t/36F8AE4B
@ -0,0 +1,26 @@
+{
+    "id": "36F8AE4B",
+    "path": "~/git/public/lab/ds/25-1/r/9.Rmd",
+    "project_path": "9.Rmd",
+    "type": "r_markdown",
+    "hash": "1911220946",
+    "contents": "",
+    "dirty": false,
+    "created": 1769447921680.0,
+    "source_on_save": false,
+    "relative_order": 1,
+    "properties": {
+        "source_window_id": "",
+        "Source": "Source",
+        "cursorPosition": "153,3",
+        "scrollLine": "154"
+    },
+    "folds": "",
+    "lastKnownWriteTime": 1769450242,
+    "encoding": "UTF-8",
+    "collab_server": "",
+    "source_window": "",
+    "last_content_update": 1769450242314,
+    "read_only": false,
+    "read_only_alternatives": []
+}
--- a/science/r/.Rproj.user/C6239C96/sources/per/t/36F8AE4B-contents
+++ b/science/r/.Rproj.user/C6239C96/sources/per/t/36F8AE4B-contents
@ -0,0 +1,161 @@
+---
+title: "Lab9: Decision trees"
+author: "Vladislav Litvinov <vlad@sek1ro>"
+output:
+  pdf_document:
+  toc_float: TRUE
+---
+# Data preparation
+```{r}
+setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
+survey <- read.csv('survey.csv')
+
+train_df = survey[1:600,]
+test_df = survey[601:750,]
+```
+# Building classification tree
+decision formula is MYDEPV ~ Price + Income + Age
+
+Use three-fold cross-validation and the information gain splitting index
+Which features were actually used to construct the tree?
+Plot the tree using the “rpart.plot” package.
+
+Three-fold cross-validation - Делают 3 прогона:
+Прогон 1: обучаемся на B + C, тестируем на A
+Прогон 2: обучаемся на A + C, тестируем на B
+Прогон 3: обучаемся на A + B, тестируем на C
+
+Получаем 3 значения метрики (accuracy, F1, MSE и т.п.).
+Берём среднее значение — это и есть итоговая оценка качества модели.
+
+rpart сам отбрасывает признаки, если они не улучшают разбиение по information gain.
+
+CP-table - связь сложности дерева и ошибки
+Root node error — ошибка без разбиений
+nsplit — число split-ов
+rel error — обучающая ошибка относительно корня
+xerror — ошибка по cross-validation
+xstd — стандартное отклонение xerror
+
+type — расположение split-ов
+extra — доп. информация в узлах
+fallen.leaves — выравнивание листьев
+
+H = -x\cdot\log\left(x\right)-\left(1-x\right)\log\left(1-x\right)
+Gain(A) = Info(S) - Info(S_A) - максимизируем
+
+Ранняя остановка. Ограничение грубины. Минимальное количество примеров в узле.
+
+Отсечение ветвей.
+Строительство полного дерева, в котором листья содержат примеры одного класса.
+Определение двух показателей: относительную точность модели и абсолютную ошибку.
+Удаление листов и узлов, потеря которых минимально скажется на точности модели и увеличении ошибки.
+
+
+```{r}
+library(rpart)
+tree = rpart(
+  MYDEPV ~ Price + Income + Age,
+  data = train_df,
+  method = "class",
+  parms = list(split = "information"),
+  control = rpart.control(
+    xval = 3,
+  ),
+)
+printcp(tree)
+
+library(rpart.plot)
+
+rpart.plot(
+  tree,
+  type = 1,
+  extra = 106,
+  #6 Class models: the probability of the second class only. Useful for binary responses.
+  #100 display the percentage of observations in the node. 
+  fallen.leaves = TRUE,
+)
+```
+Score the model with the training data and create the model’s confusion matrix.  Which class of MYDEPV was the model better able to classify?
+```{r}
+pred_class = predict(tree, train_df, type="class")
+
+conf_mat = table(
+  Actual = train_df$MYDEPV,
+  Predicted = pred_class
+)
+
+conf_mat
+print(diag(conf_mat) / rowSums(conf_mat))
+```
+Define the resubstitution error rate, and then calculate it using the confusion matrix from the previous step.  Is it a good indicator of predictive performance?  Why or why not?
+
+Resubstitution error rate — это доля неправильных предсказаний на тех же данных, на которых обучалась модель
+```{r}
+print(1 - sum(diag(conf_mat)) / sum(conf_mat))
+```
+ROC curve - Receiver Operating Characteristic
+x - FPR = FP / (FP + TN)
+y - TPR = TP / (TP + FN)
+```{r}
+pred_prob = predict(tree, train_df, type="prob")[,2]
+
+library(ROCR)
+pred = prediction(pred_prob, train_df$MYDEPV)
+perf = performance(pred, "tpr", "fpr")
+
+plot(perf)
+abline(a = 0, b = 1)
+
+auc_perf = performance(pred, measure = "auc")
+auc_perf@y.values[[1]]
+```
+Score the model with the testing data.  How accurate are the tree’s predictions?
+Repeat part (a), but set the splitting index to the Gini coefficient splitting index.  How does the new tree compare to the previous one? 
+
+индекс Джини показывает, как часто случайно выбранный пример обучающего множества будет распознан неправильно.
+
+Gini(Q) = 1 - sum(p^2) - максимизируем
+0 - все к 1 классу
+1 - все равновероятны
+1-\ x^{2}\ -\ \left(1-x\right)^{2}
+```{r}
+pred_test = predict(tree, test_df, type="class")
+conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)
+conf_mat_test
+print(diag(conf_mat_test) / rowSums(conf_mat_test))
+
+tree_gini = rpart(
+  MYDEPV ~ Price + Income + Age,
+  data = train_df,
+  method = "class",
+  parms = list(split = "gini")
+)
+
+printcp(tree_gini)
+
+rpart.plot(
+  tree_gini,
+  type = 1,
+  extra = 106,
+  fallen.leaves = TRUE,
+)
+```
+One way to prune a tree is according to the complexity parameter associated with the smallest cross-validation error.  Prune the new tree in this way using the “prune” function.  Which features were actually used in the pruned tree?  Why were certain variables not used?
+```{r}
+best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
+best_cp
+
+pruned_tree = prune(tree_gini, cp = best_cp)
+
+printcp(pruned_tree)
+
+rpart.plot(pruned_tree)
+```
+Create the confusion matrix for the new model, and compare the performance of the model before and after pruning.
+```{r}
+pruned_pred = predict(pruned_tree, test_df, type="class")
+pruned_conf_mat = table(Actual = test_df$MYDEPV, Predicted = pruned_pred)
+pruned_conf_mat
+print(diag(pruned_conf_mat) / rowSums(pruned_conf_mat))
+```
--- a/science/r/.Rproj.user/C6239C96/sources/prop/231EDFBF
+++ b/science/r/.Rproj.user/C6239C96/sources/prop/231EDFBF
@ -0,0 +1,6 @@
+{
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "153,3",
+    "scrollLine": "154"
+}
--- a/science/r/.Rproj.user/C6239C96/sources/prop/D80D5B6A
+++ b/science/r/.Rproj.user/C6239C96/sources/prop/D80D5B6A
@ -0,0 +1,7 @@
+{
+    "tempName": "Untitled1",
+    "source_window_id": "",
+    "Source": "Source",
+    "cursorPosition": "28,0",
+    "scrollLine": "17"
+}
--- a/science/r/.Rproj.user/C6239C96/sources/prop/INDEX
+++ b/science/r/.Rproj.user/C6239C96/sources/prop/INDEX
@ -0,0 +1,2 @@
+~%2Fgit%2Fpublic%2Flab%2Fds%2F25-1%2Fr%2F9.Rmd="231EDFBF"
+~%2Fgit%2Fpublic%2Flab%2Fds%2F25-1%2Fr2%2F3.R="D80D5B6A"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/C6239C961f2591b7/chunks.json
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/C6239C961f2591b7/chunks.json
@ -0,0 +1 @@
+{"chunk_definitions":[{"row":14,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-3","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cw3y8fjmo2ayt","chunk_label":"unnamed-chunk-1"},{"row":77,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-4","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cgb1v2g83kknt","chunk_label":"unnamed-chunk-2"},{"row":89,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-5","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c3jleyvkqxnqm","chunk_label":"unnamed-chunk-3"},{"row":95,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-6","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c60fx7tj15bk5","chunk_label":"unnamed-chunk-4"},{"row":111,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-7","dev":"png"},"document_id":"36F8AE4B","chunk_id":"csdwusaa8puvd","chunk_label":"unnamed-chunk-5"},{"row":142,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-18","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cr3h7jd3nr0ya","chunk_label":"unnamed-chunk-6"},{"row":153,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-19","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cpyo5ihaht7o1","chunk_label":"unnamed-chunk-7"},{"row":160,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-20","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cce5y7xzr9zk6","chunk_label":"unnamed-chunk-8"}],"doc_write_time":1769443515}
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/c3jleyvkqxnqm/00000f.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/c3jleyvkqxnqm/00000f.csv
@ -0,0 +1,32 @@
+"0","pred_class = predict(tree, train_df, type=""class"")"
+"0",""
+"0","conf_mat = table("
+"0","  Actual = train_df$MYDEPV,"
+"0","  Predicted = pred_class"
+"0",")"
+"0",""
+"0","conf_mat"
+"1","      Predicted
+"
+"1","Actual"
+"1","   0"
+"1","   1"
+"1","
+     0"
+"1"," 314"
+"1","  26"
+"1","
+     1"
+"1","  19"
+"1"," 241"
+"1","
+"
+"0","print(diag(conf_mat) / rowSums(conf_mat))"
+"1","        0 "
+"1","        1 "
+"1","
+"
+"1","0.9235294 "
+"1","0.9269231 "
+"1","
+"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/c60fx7tj15bk5/00000f.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/c60fx7tj15bk5/00000f.csv
@ -0,0 +1,5 @@
+"0","print(1 - sum(diag(conf_mat)) / sum(conf_mat))"
+"1","[1]"
+"1"," 0.075"
+"1","
+"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cce5y7xzr9zk6/00000f.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cce5y7xzr9zk6/00000f.csv
@ -0,0 +1,27 @@
+"0","pruned_pred = predict(pruned_tree, test_df, type=""class"")"
+"0","pruned_conf_mat = table(Actual = test_df$MYDEPV, Predicted = pruned_pred)"
+"0","pruned_conf_mat"
+"1","      Predicted
+"
+"1","Actual"
+"1","  0"
+"1","  1"
+"1","
+     0"
+"1"," 82"
+"1","  4"
+"1","
+     1"
+"1"," 13"
+"1"," 51"
+"1","
+"
+"0","print(diag(pruned_conf_mat) / rowSums(pruned_conf_mat))"
+"1","        0 "
+"1","        1 "
+"1","
+"
+"1","0.9534884 "
+"1","0.7968750 "
+"1","
+"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/00000f.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/00000f.csv
@ -0,0 +1,107 @@
+"0","library(rpart)"
+"0","tree = rpart("
+"0","  MYDEPV ~ Price + Income + Age,"
+"0","  data = train_df,"
+"0","  method = ""class"","
+"0","  parms = list(split = ""information""),"
+"0","  control = rpart.control("
+"0","    xval = 3,"
+"0","  ),"
+"0",")"
+"0","printcp(tree)"
+"1","
+Classification tree:
+"
+"1","rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df, 
+"
+"1","    method = ""class"", parms = list(split = ""information""), control = rpart.control(xval = 3, 
+"
+"1","        ))
+"
+"1","
+"
+"1","Variables actually used in tree construction:
+"
+"1","[1]"
+"1"," Age   "
+"1"," Income"
+"1"," Price "
+"1","
+"
+"1","
+"
+"1","Root node error: "
+"1",""
+"1","260"
+"1",""
+"1","/"
+"1",""
+"1","600"
+"1",""
+"1"," = "
+"1",""
+"1","0.43333"
+"1",""
+"1","
+
+"
+"1","n="
+"1"," "
+"1","600"
+"1"," "
+"1","
+
+"
+"1"," "
+"1","       CP"
+"1"," nsplit"
+"1"," rel error"
+"1","  xerror"
+"1","     xstd"
+"1","
+1"
+"1"," 0.692308"
+"1","      0"
+"1","   1.00000"
+"1"," 1.00000"
+"1"," 0.046685"
+"1","
+2"
+"1"," 0.025000"
+"1","      1"
+"1","   0.30769"
+"1"," 0.31154"
+"1"," 0.032194"
+"1","
+3"
+"1"," 0.011538"
+"1","      3"
+"1","   0.25769"
+"1"," 0.27308"
+"1"," 0.030430"
+"1","
+4"
+"1"," 0.010256"
+"1","      5"
+"1","   0.23462"
+"1"," 0.26923"
+"1"," 0.030244"
+"1","
+5"
+"1"," 0.010000"
+"1","     11"
+"1","   0.17308"
+"1"," 0.26923"
+"1"," 0.030244"
+"1","
+"
+"0","library(rpart.plot)"
+"0",""
+"0","rpart.plot("
+"0","  tree,"
+"0","  type = 1,"
+"0","  extra = 106,"
+"0","  #6 Class models: the probability of the second class only. Useful for binary responses."
+"0","  #100 display the percentage of observations in the node. "
+"0","  fallen.leaves = TRUE,"
+"0",")"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/000010.metadata
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/000010.metadata
@ -0,0 +1 @@
+{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/000010.png
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/000010.png
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/000010.snapshot
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cgb1v2g83kknt/000010.snapshot
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/chunks.json
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/chunks.json
@ -0,0 +1 @@
+{"chunk_definitions":[{"row":14,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-3","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cw3y8fjmo2ayt","chunk_label":"unnamed-chunk-1"},{"row":77,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-4","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cgb1v2g83kknt","chunk_label":"unnamed-chunk-2"},{"row":89,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-5","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c3jleyvkqxnqm","chunk_label":"unnamed-chunk-3"},{"row":95,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-6","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c60fx7tj15bk5","chunk_label":"unnamed-chunk-4"},{"row":111,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-7","dev":"png"},"document_id":"36F8AE4B","chunk_id":"csdwusaa8puvd","chunk_label":"unnamed-chunk-5"},{"row":142,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-18","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cr3h7jd3nr0ya","chunk_label":"unnamed-chunk-6"},{"row":153,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-19","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cpyo5ihaht7o1","chunk_label":"unnamed-chunk-7"},{"row":160,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-20","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cce5y7xzr9zk6","chunk_label":"unnamed-chunk-8"}],"doc_write_time":1769443515}
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000011.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000011.csv
@ -0,0 +1,79 @@
+"0","best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, ""xerror""]), ""CP""]"
+"0","best_cp"
+"1","[1]"
+"1"," 0.01153846"
+"1","
+"
+"0","pruned_tree = prune(tree_gini, cp = best_cp)"
+"0",""
+"0","printcp(pruned_tree)"
+"1","
+Classification tree:
+"
+"1","rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df, 
+"
+"1","    method = ""class"", parms = list(split = ""gini""))
+"
+"1","
+"
+"1","Variables actually used in tree construction:
+"
+"1","[1]"
+"1"," Income"
+"1"," Price "
+"1","
+"
+"1","
+"
+"1","Root node error: "
+"1",""
+"1","260"
+"1",""
+"1","/"
+"1",""
+"1","600"
+"1",""
+"1"," = "
+"1",""
+"1","0.43333"
+"1",""
+"1","
+
+"
+"1","n="
+"1"," "
+"1","600"
+"1"," "
+"1","
+
+"
+"1"," "
+"1","       CP"
+"1"," nsplit"
+"1"," rel error"
+"1","  xerror"
+"1","     xstd"
+"1","
+1"
+"1"," 0.692308"
+"1","      0"
+"1","   1.00000"
+"1"," 1.00000"
+"1"," 0.046685"
+"1","
+2"
+"1"," 0.025000"
+"1","      1"
+"1","   0.30769"
+"1"," 0.31154"
+"1"," 0.032194"
+"1","
+3"
+"1"," 0.011538"
+"1","      3"
+"1","   0.25769"
+"1"," 0.26538"
+"1"," 0.030055"
+"1","
+"
+"0","rpart.plot(pruned_tree)"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000012.metadata
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000012.metadata
@ -0,0 +1 @@
+{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000012.png
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000012.png
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000012.snapshot
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cpyo5ihaht7o1/000012.snapshot
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000015.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000015.csv
@ -0,0 +1,125 @@
+"0","pred_test = predict(tree, test_df, type=""class"")"
+"0","conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)"
+"0","conf_mat_test"
+"1","      Predicted
+"
+"1","Actual"
+"1","  0"
+"1","  1"
+"1","
+     0"
+"1"," 76"
+"1"," 10"
+"1","
+     1"
+"1","  6"
+"1"," 58"
+"1","
+"
+"0","print(diag(conf_mat_test) / rowSums(conf_mat_test))"
+"1","        0 "
+"1","        1 "
+"1","
+"
+"1","0.8837209 "
+"1","0.9062500 "
+"1","
+"
+"0","tree_gini = rpart("
+"0","  MYDEPV ~ Price + Income + Age,"
+"0","  data = train_df,"
+"0","  method = ""class"","
+"0","  parms = list(split = ""gini"")"
+"0",")"
+"0",""
+"0","printcp(tree_gini)"
+"1","
+Classification tree:
+"
+"1","rpart(formula = MYDEPV ~ Price + Income + Age, data = train_df, 
+"
+"1","    method = ""class"", parms = list(split = ""gini""))
+"
+"1","
+"
+"1","Variables actually used in tree construction:
+"
+"1","[1]"
+"1"," Age   "
+"1"," Income"
+"1"," Price "
+"1","
+"
+"1","
+"
+"1","Root node error: "
+"1",""
+"1","260"
+"1",""
+"1","/"
+"1",""
+"1","600"
+"1",""
+"1"," = "
+"1",""
+"1","0.43333"
+"1",""
+"1","
+
+"
+"1","n="
+"1"," "
+"1","600"
+"1"," "
+"1","
+
+"
+"1"," "
+"1","       CP"
+"1"," nsplit"
+"1"," rel error"
+"1","  xerror"
+"1","     xstd"
+"1","
+1"
+"1"," 0.692308"
+"1","      0"
+"1","   1.00000"
+"1"," 1.00000"
+"1"," 0.046685"
+"1","
+2"
+"1"," 0.025000"
+"1","      1"
+"1","   0.30769"
+"1"," 0.31154"
+"1"," 0.032194"
+"1","
+3"
+"1"," 0.011538"
+"1","      3"
+"1","   0.25769"
+"1"," 0.26538"
+"1"," 0.030055"
+"1","
+4"
+"1"," 0.010256"
+"1","      5"
+"1","   0.23462"
+"1"," 0.28846"
+"1"," 0.031157"
+"1","
+5"
+"1"," 0.010000"
+"1","     11"
+"1","   0.17308"
+"1"," 0.28462"
+"1"," 0.030978"
+"1","
+"
+"0","rpart.plot("
+"0","  tree_gini,"
+"0","  type = 1,"
+"0","  extra = 106,"
+"0","  fallen.leaves = TRUE,"
+"0",")"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000016.metadata
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000016.metadata
@ -0,0 +1 @@
+{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000016.png
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000016.png
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000016.snapshot
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cr3h7jd3nr0ya/000016.snapshot
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/00000f.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/00000f.csv
@ -0,0 +1,7 @@
+"0","pred_prob = predict(tree, train_df, type=""prob"")[,2]"
+"0",""
+"0","library(ROCR)"
+"0","pred = prediction(pred_prob, train_df$MYDEPV)"
+"0","perf = performance(pred, ""tpr"", ""fpr"")"
+"0",""
+"0","plot(perf)"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000011.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000011.csv
@ -0,0 +1 @@
+"0","abline(a = 0, b = 1)"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000012.metadata
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000012.metadata
@ -0,0 +1 @@
+{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000012.png
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000012.png
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000012.snapshot
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000012.snapshot
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000013.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/csdwusaa8puvd/000013.csv
@ -0,0 +1,7 @@
+"0",""
+"0","auc_perf = performance(pred, measure = ""auc"")"
+"0","auc_perf@y.values[[1]]"
+"1","[1]"
+"1"," 0.9720645"
+"1","
+"
--- a/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cw3y8fjmo2ayt/00000f.csv
+++ b/science/r/.Rproj.user/shared/notebooks/EB7B11F9-9/1/s/cw3y8fjmo2ayt/00000f.csv
@ -0,0 +1,5 @@
+"0","setwd('/home/sek1ro/git/public/lab/ds/25-1/r')"
+"0","survey <- read.csv('survey.csv')"
+"0",""
+"0","train_df = survey[1:600,]"
+"0","test_df = survey[601:750,]"
--- a/science/r/.Rproj.user/shared/notebooks/patch-chunk-names
+++ b/science/r/.Rproj.user/shared/notebooks/patch-chunk-names
--- a/science/r/.Rproj.user/shared/notebooks/paths
+++ b/science/r/.Rproj.user/shared/notebooks/paths
@ -0,0 +1 @@
+/home/sek1ro/git/public/lab/ds/25-1/r/9.Rmd="EB7B11F9"
--- a/science/r/10.Rmd
+++ b/science/r/10.Rmd
@ -0,0 +1,111 @@
+---
+title: "Lab10: Time Series"
+author: "Vladislav Litvinov <vlad@sek1ro>"
+output:
+  pdf_document:
+  toc_float: TRUE
+---
+Plotting data set
+```{r}
+setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
+jj = scan("jj.dat")
+jj_ts = ts(jj, start = c(1960, 1), frequency = 4)
+
+jj_ts
+
+plot(jj_ts, ylab = "EPS", xlab = "Year")
+```
+In order to perform an ARIMA model, the time series will need to be transformed to remove any trend.  Plot the difference of xt and xt-1, for all t > 0.   Has this difference adequately detrended the series? Does the variability of the EPS appear constant over time?  Why does the constant variance matter?
+```{r}
+jj_diff = diff(jj_ts)
+
+plot(jj_diff, xlab = "Year", ylab = "EPS diff")
+```
+Plot the log10 of the quarterly EPS vs. time and plot the difference of log10(xt ) and
+log10(xt-1) for all t > 0.  Has this adequately detrended the series?  Has the variability of the differenced log10(EPS) become more constant? 
+```{r}
+log_jj = log10(jj_ts)
+log_jj_diff = diff(log_jj)
+
+plot(log_jj, xlab = "Year", ylab = "log10(EPS)")
+plot(log_jj_diff, xlab = "Year", ylab = "log10(EPS) diff")
+```
+Treating the differenced log10 of the EPS series as a stationary series, plot the ACF and PACF of this series.  What possible ARIMA models would you consider and why?
+
+ACF(k) = Corr(x[t], x[t-k]) - Autocorrelation Function, показывает, насколько временной ряд коррелирует сам с собой
+
+PACF - Partial Autocorrelation Function - оказывает чистую связь после удаления влияния всех промежуточных значений между t и t-k, это последний коэффициент в AR(k)-регрессии
+
+xt = f1xt-1 + f2xt-2 + .. + fkxt-k + eps +  f1xt-1 + f2xt-2 + .. + fkxt-k 
+PACF(k) = fk
+
+ARMA(p, q)
+p - AR-часть, предыдущие значения - PACF
+q - MA-часть, ошибки предыдущих предсказаний - ACF
+
+ARIMA(p, d, q)
+d - I-часть, number of differences
+
+```{r}
+acf(log_jj_diff, lag.max = 20)
+ar(log_jj_diff)
+pacf(log_jj_diff, lag.max = 20)
+```
+Run the proposed ARIMA models from part d and compare the results. Identify an appropriate model. Justify your choice.
+
+Смысл: баланс между
+
+Качеством подгонки (чем лучше модель описывает данные, тем ниже ошибка)
+
+Сложностью модели (чем больше параметров, тем выше риск переобучения)
+
+AIC=2k−2ln(L)
+L > , k <
+
+Why is the choice of natural log or log base 10 in Problem 4.8 somewhat irrelevant to the transformation and the analysis?
+
+Why is the value of the ACF for lag 0 equal to one?
+```{r}
+library(forecast)
+
+fit_model = function(order) {
+  Arima(log_jj_diff, order = order)
+}
+
+models <- list(
+  "1, 0, 1" = fit_model(c(1,0,1)),
+  "1, 1, 1" = fit_model(c(1,1,1)),
+  "1, 0, 5" = fit_model(c(1,0,5)),
+  "1, 1, 5" = fit_model(c(1,1,5))
+)
+
+print(models["1, 0, 5"])
+
+aic_values <- sapply(models, AIC)
+print(aic_values)
+```
+Arima(1, 0, 5)
+```{r}
+n = 10000
+phi4 = c(-0.18)
+AR <- arima.sim(n=n, list(ar=phi4[1]))
+
+plot(AR, main="AR series")
+acf(AR, main="ACF AR")
+pacf(AR, main="PACF AR")
+
+theta4 <- c(-0.65, -0.22, -0.28, 1, -0.4)
+MA <- arima.sim(n=n, list(ma=theta4))
+
+plot(MA, main="MA series")
+acf(MA, main="ACF MA")
+pacf(MA, main="PACF MA")
+```
+
+```{r}
+fit <- auto.arima(jj_ts)
+summary(fit)
+
+forecasted_values <- forecast(fit, h = 20)
+plot(forecasted_values)
+```
--- a/science/r/12.1/ex4.pdf
+++ b/science/r/12.1/ex4.pdf
--- a/science/r/12.1/mlclass-ex4/checkNNGradients.m
+++ b/science/r/12.1/mlclass-ex4/checkNNGradients.m
@ -0,0 +1,52 @@
+function checkNNGradients(lambda)
+%CHECKNNGRADIENTS Creates a small neural network to check the
+%backpropagation gradients
+%   CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
+%   backpropagation gradients, it will output the analytical gradients
+%   produced by your backprop code and the numerical gradients (computed
+%   using computeNumericalGradient). These two gradient computations should
+%   result in very similar values.
+%
+
+if ~exist('lambda', 'var') || isempty(lambda)
+    lambda = 0;
+end
+
+input_layer_size = 3;
+hidden_layer_size = 5;
+num_labels = 3;
+m = 5;
+
+% We generate some 'random' test data
+Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size);
+Theta2 = debugInitializeWeights(num_labels, hidden_layer_size);
+% Reusing debugInitializeWeights to generate X
+X  = debugInitializeWeights(m, input_layer_size - 1);
+y  = 1 + mod(1:m, num_labels)';
+
+% Unroll parameters
+nn_params = [Theta1(:) ; Theta2(:)];
+
+% Short hand for cost function
+costFunc = @(p) nnCostFunction(p, input_layer_size, hidden_layer_size, ...
+                               num_labels, X, y, lambda);
+
+[cost, grad] = costFunc(nn_params);
+numgrad = computeNumericalGradient(costFunc, nn_params);
+
+% Visually examine the two gradient computations.  The two columns
+% you get should be very similar. 
+disp([numgrad grad]);
+fprintf(['The above two columns you get should be very similar.\n' ...
+         '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n']);
+
+% Evaluate the norm of the difference between two solutions.  
+% If you have a correct implementation, and assuming you used EPSILON = 0.0001 
+% in computeNumericalGradient.m, then diff below should be less than 1e-9
+diff = norm(numgrad-grad)/norm(numgrad+grad);
+
+fprintf(['If your backpropagation implementation is correct, then \n' ...
+         'the relative difference will be small (less than 1e-9). \n' ...
+         '\nRelative Difference: %g\n'], diff);
+
+end
--- a/science/r/12.1/mlclass-ex4/computeNumericalGradient.m
+++ b/science/r/12.1/mlclass-ex4/computeNumericalGradient.m
@ -0,0 +1,29 @@
+function numgrad = computeNumericalGradient(J, theta)
+%COMPUTENUMERICALGRADIENT Computes the gradient using "finite differences"
+%and gives us a numerical estimate of the gradient.
+%   numgrad = COMPUTENUMERICALGRADIENT(J, theta) computes the numerical
+%   gradient of the function J around theta. Calling y = J(theta) should
+%   return the function value at theta.
+
+% Notes: The following code implements numerical gradient checking, and 
+%        returns the numerical gradient.It sets numgrad(i) to (a numerical 
+%        approximation of) the partial derivative of J with respect to the 
+%        i-th input argument, evaluated at theta. (i.e., numgrad(i) should 
+%        be the (approximately) the partial derivative of J with respect 
+%        to theta(i).)
+%                
+
+numgrad = zeros(size(theta));
+perturb = zeros(size(theta));
+e = 1e-4;
+for p = 1:numel(theta)
+    % Set perturbation vector
+    perturb(p) = e;
+    loss1 = J(theta - perturb);
+    loss2 = J(theta + perturb);
+    % Compute Numerical Gradient
+    numgrad(p) = (loss2 - loss1) / (2*e);
+    perturb(p) = 0;
+end
+
+end
--- a/science/r/12.1/mlclass-ex4/debugInitializeWeights.m
+++ b/science/r/12.1/mlclass-ex4/debugInitializeWeights.m
@ -0,0 +1,22 @@
+function W = debugInitializeWeights(fan_out, fan_in)
+%DEBUGINITIALIZEWEIGHTS Initialize the weights of a layer with fan_in
+%incoming connections and fan_out outgoing connections using a fixed
+%strategy, this will help you later in debugging
+%   W = DEBUGINITIALIZEWEIGHTS(fan_in, fan_out) initializes the weights 
+%   of a layer with fan_in incoming connections and fan_out outgoing 
+%   connections using a fix set of values
+%
+%   Note that W should be set to a matrix of size(1 + fan_in, fan_out) as
+%   the first row of W handles the "bias" terms
+%
+
+% Set W to zeros
+W = zeros(fan_out, 1 + fan_in);
+
+% Initialize W using "sin", this ensures that W is always of the same
+% values and will be useful for debugging
+W = reshape(sin(1:numel(W)), size(W)) / 10;
+
+% =========================================================================
+
+end
--- a/science/r/12.1/mlclass-ex4/displayData.m
+++ b/science/r/12.1/mlclass-ex4/displayData.m
@ -0,0 +1,59 @@
+function [h, display_array] = displayData(X, example_width)
+%DISPLAYDATA Display 2D data in a nice grid
+%   [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data
+%   stored in X in a nice grid. It returns the figure handle h and the 
+%   displayed array if requested.
+
+% Set example_width automatically if not passed in
+if ~exist('example_width', 'var') || isempty(example_width) 
+	example_width = round(sqrt(size(X, 2)));
+end
+
+% Gray Image
+colormap(gray);
+
+% Compute rows, cols
+[m n] = size(X);
+example_height = (n / example_width);
+
+% Compute number of items to display
+display_rows = floor(sqrt(m));
+display_cols = ceil(m / display_rows);
+
+% Between images padding
+pad = 1;
+
+% Setup blank display
+display_array = - ones(pad + display_rows * (example_height + pad), ...
+                       pad + display_cols * (example_width + pad));
+
+% Copy each example into a patch on the display array
+curr_ex = 1;
+for j = 1:display_rows
+	for i = 1:display_cols
+		if curr_ex > m, 
+			break; 
+		end
+		% Copy the patch
+		
+		% Get the max value of the patch
+		max_val = max(abs(X(curr_ex, :)));
+		display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ...
+		              pad + (i - 1) * (example_width + pad) + (1:example_width)) = ...
+						reshape(X(curr_ex, :), example_height, example_width) / max_val;
+		curr_ex = curr_ex + 1;
+	end
+	if curr_ex > m, 
+		break; 
+	end
+end
+
+% Display Image
+h = imagesc(display_array, [-1 1]);
+
+% Do not show axis
+axis image off
+
+drawnow;
+
+end
--- a/science/r/12.1/mlclass-ex4/ex4.m
+++ b/science/r/12.1/mlclass-ex4/ex4.m
@ -0,0 +1,234 @@
+%% Machine Learning Online Class - Exercise 4 Neural Network Learning
+
+%  Instructions
+%  ------------
+% 
+%  This file contains code that helps you get started on the
+%  linear exercise. You will need to complete the following functions 
+%  in this exericse:
+%
+%     sigmoidGradient.m
+%     randInitializeWeights.m
+%     nnCostFunction.m
+%
+%  For this exercise, you will not need to change any code in this file,
+%  or any other files other than those mentioned above.
+%
+
+%% Initialization
+clear ; close all; clc
+
+%% Setup the parameters you will use for this exercise
+input_layer_size  = 400;  % 20x20 Input Images of Digits
+hidden_layer_size = 25;   % 25 hidden units
+num_labels = 10;          % 10 labels, from 1 to 10   
+                          % (note that we have mapped "0" to label 10)
+
+%% =========== Part 1: Loading and Visualizing Data =============
+%  We start the exercise by first loading and visualizing the dataset. 
+%  You will be working with a dataset that contains handwritten digits.
+%
+
+% Load Training Data
+fprintf('Loading and Visualizing Data ...\n')
+
+load('ex4data1.mat');
+m = size(X, 1);
+
+% Randomly select 100 data points to display
+sel = randperm(size(X, 1));
+sel = sel(1:100);
+
+displayData(X(sel, :));
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% ================ Part 2: Loading Pameters ================
+% In this part of the exercise, we load some pre-initialized 
+% neural network parameters.
+
+fprintf('\nLoading Saved Neural Network Parameters ...\n')
+
+% Load the weights into variables Theta1 and Theta2
+load('ex4weights.mat');
+
+% Unroll parameters 
+nn_params = [Theta1(:) ; Theta2(:)];
+
+%% ================ Part 3: Compute Cost (Feedforward) ================
+%  To the neural network, you should first start by implementing the
+%  feedforward part of the neural network that returns the cost only. You
+%  should complete the code in nnCostFunction.m to return cost. After
+%  implementing the feedforward to compute the cost, you can verify that
+%  your implementation is correct by verifying that you get the same cost
+%  as us for the fixed debugging parameters.
+%
+%  We suggest implementing the feedforward cost *without* regularization
+%  first so that it will be easier for you to debug. Later, in part 4, you
+%  will get to implement the regularized cost.
+%
+fprintf('\nFeedforward Using Neural Network ...\n')
+
+% Weight regularization parameter (we set this to 0 here).
+lambda = 0;
+
+J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ...
+                   num_labels, X, y, lambda);
+
+fprintf(['Cost at parameters (loaded from ex4weights): %f '...
+         '\n(this value should be about 0.287629)\n'], J);
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+
+%% =============== Part 4: Implement Regularization ===============
+%  Once your cost function implementation is correct, you should now
+%  continue to implement the regularization with the cost.
+%
+
+fprintf('\nChecking Cost Function (w/ Regularization) ... \n')
+
+% Weight regularization parameter (we set this to 1 here).
+lambda = 1;
+
+J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, ...
+                   num_labels, X, y, lambda);
+
+fprintf(['Cost at parameters (loaded from ex4weights): %f '...
+         '\n(this value should be about 0.383770)\n'], J);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% ================ Part 5: Sigmoid Gradient  ================
+%  Before you start implementing the neural network, you will first
+%  implement the gradient for the sigmoid function. You should complete the
+%  code in the sigmoidGradient.m file.
+%
+
+fprintf('\nEvaluating sigmoid gradient...\n')
+
+g = sigmoidGradient([1 -0.5 0 0.5 1]);
+fprintf('Sigmoid gradient evaluated at [1 -0.5 0 0.5 1]:\n  ');
+fprintf('%f ', g);
+fprintf('\n\n');
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% ================ Part 6: Initializing Pameters ================
+%  In this part of the exercise, you will be starting to implment a two
+%  layer neural network that classifies digits. You will start by
+%  implementing a function to initialize the weights of the neural network
+%  (randInitializeWeights.m)
+
+fprintf('\nInitializing Neural Network Parameters ...\n')
+
+initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size);
+initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels);
+
+% Unroll parameters
+initial_nn_params = [initial_Theta1(:) ; initial_Theta2(:)];
+
+
+%% =============== Part 7: Implement Backpropagation ===============
+%  Once your cost matches up with ours, you should proceed to implement the
+%  backpropagation algorithm for the neural network. You should add to the
+%  code you've written in nnCostFunction.m to return the partial
+%  derivatives of the parameters.
+%
+fprintf('\nChecking Backpropagation... \n');
+
+%  Check gradients by running checkNNGradients
+checkNNGradients;
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+
+
+%% =============== Part 8: Implement Regularization ===============
+%  Once your backpropagation implementation is correct, you should now
+%  continue to implement the regularization with the cost and gradient.
+%
+
+fprintf('\nChecking Backpropagation (w/ Regularization) ... \n')
+
+%  Check gradients by running checkNNGradients
+lambda = 3;
+checkNNGradients(lambda);
+
+% Also output the costFunction debugging values
+debug_J  = nnCostFunction(nn_params, input_layer_size, ...
+                          hidden_layer_size, num_labels, X, y, lambda);
+
+fprintf(['\n\nCost at (fixed) debugging parameters (w/ lambda = 10): %f ' ...
+         '\n(this value should be about 0.576051)\n\n'], debug_J);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% =================== Part 8: Training NN ===================
+%  You have now implemented all the code necessary to train a neural 
+%  network. To train your neural network, we will now use "fmincg", which
+%  is a function which works similarly to "fminunc". Recall that these
+%  advanced optimizers are able to train our cost functions efficiently as
+%  long as we provide them with the gradient computations.
+%
+fprintf('\nTraining Neural Network... \n')
+
+%  After you have completed the assignment, change the MaxIter to a larger
+%  value to see how more training helps.
+options = optimset('MaxIter', 50);
+
+%  You should also try different values of lambda
+lambda = 1;
+
+% Create "short hand" for the cost function to be minimized
+costFunction = @(p) nnCostFunction(p, ...
+                                   input_layer_size, ...
+                                   hidden_layer_size, ...
+                                   num_labels, X, y, lambda);
+
+% Now, costFunction is a function that takes in only one argument (the
+% neural network parameters)
+[nn_params, cost] = fmincg(costFunction, initial_nn_params, options);
+
+% Obtain Theta1 and Theta2 back from nn_params
+Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
+                 hidden_layer_size, (input_layer_size + 1));
+
+Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
+                 num_labels, (hidden_layer_size + 1));
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% ================= Part 9: Visualize Weights =================
+%  You can now "visualize" what the neural network is learning by 
+%  displaying the hidden units to see what features they are capturing in 
+%  the data.
+
+fprintf('\nVisualizing Neural Network... \n')
+
+displayData(Theta1(:, 2:end));
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+
+%% ================= Part 10: Implement Predict =================
+%  After training the neural network, we would like to use it to predict
+%  the labels. You will now implement the "predict" function to use the
+%  neural network to predict the labels of the training set. This lets
+%  you compute the training set accuracy.
+
+pred = predict(Theta1, Theta2, X);
+
+fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100);
+
+
--- a/science/r/12.1/mlclass-ex4/ex4data1.mat
+++ b/science/r/12.1/mlclass-ex4/ex4data1.mat
--- a/science/r/12.1/mlclass-ex4/ex4weights.mat
+++ b/science/r/12.1/mlclass-ex4/ex4weights.mat
--- a/science/r/12.1/mlclass-ex4/fmincg.m
+++ b/science/r/12.1/mlclass-ex4/fmincg.m
@ -0,0 +1,175 @@
+function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5)
+% Minimize a continuous differentialble multivariate function. Starting point
+% is given by "X" (D by 1), and the function named in the string "f", must
+% return a function value and a vector of partial derivatives. The Polack-
+% Ribiere flavour of conjugate gradients is used to compute search directions,
+% and a line search using quadratic and cubic polynomial approximations and the
+% Wolfe-Powell stopping criteria is used together with the slope ratio method
+% for guessing initial step sizes. Additionally a bunch of checks are made to
+% make sure that exploration is taking place and that extrapolation will not
+% be unboundedly large. The "length" gives the length of the run: if it is
+% positive, it gives the maximum number of line searches, if negative its
+% absolute gives the maximum allowed number of function evaluations. You can
+% (optionally) give "length" a second component, which will indicate the
+% reduction in function value to be expected in the first line-search (defaults
+% to 1.0). The function returns when either its length is up, or if no further
+% progress can be made (ie, we are at a minimum, or so close that due to
+% numerical problems, we cannot get any closer). If the function terminates
+% within a few iterations, it could be an indication that the function value
+% and derivatives are not consistent (ie, there may be a bug in the
+% implementation of your "f" function). The function returns the found
+% solution "X", a vector of function values "fX" indicating the progress made
+% and "i" the number of iterations (line searches or function evaluations,
+% depending on the sign of "length") used.
+%
+% Usage: [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5)
+%
+% See also: checkgrad 
+%
+% Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13
+%
+%
+% (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen
+% 
+% Permission is granted for anyone to copy, use, or modify these
+% programs and accompanying documents for purposes of research or
+% education, provided this copyright notice is retained, and note is
+% made of any changes that have been made.
+% 
+% These programs and documents are distributed without any warranty,
+% express or implied.  As the programs were written for research
+% purposes only, they have not been tested to the degree that would be
+% advisable in any important application.  All use of these programs is
+% entirely at the user's own risk.
+%
+% [ml-class] Changes Made:
+% 1) Function name and argument specifications
+% 2) Output display
+%
+
+% Read options
+if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter')
+    length = options.MaxIter;
+else
+    length = 100;
+end
+
+
+RHO = 0.01;                            % a bunch of constants for line searches
+SIG = 0.5;       % RHO and SIG are the constants in the Wolfe-Powell conditions
+INT = 0.1;    % don't reevaluate within 0.1 of the limit of the current bracket
+EXT = 3.0;                    % extrapolate maximum 3 times the current bracket
+MAX = 20;                         % max 20 function evaluations per line search
+RATIO = 100;                                      % maximum allowed slope ratio
+
+argstr = ['feval(f, X'];                      % compose string used to call function
+for i = 1:(nargin - 3)
+  argstr = [argstr, ',P', int2str(i)];
+end
+argstr = [argstr, ')'];
+
+if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end
+S=['Iteration '];
+
+i = 0;                                            % zero the run length counter
+ls_failed = 0;                             % no previous line search has failed
+fX = [];
+[f1 df1] = eval(argstr);                      % get function value and gradient
+i = i + (length<0);                                            % count epochs?!
+s = -df1;                                        % search direction is steepest
+d1 = -s'*s;                                                 % this is the slope
+z1 = red/(1-d1);                                  % initial step is red/(|s|+1)
+
+while i < abs(length)                                      % while not finished
+  i = i + (length>0);                                      % count iterations?!
+
+  X0 = X; f0 = f1; df0 = df1;                   % make a copy of current values
+  X = X + z1*s;                                             % begin line search
+  [f2 df2] = eval(argstr);
+  i = i + (length<0);                                          % count epochs?!
+  d2 = df2'*s;
+  f3 = f1; d3 = d1; z3 = -z1;             % initialize point 3 equal to point 1
+  if length>0, M = MAX; else M = min(MAX, -length-i); end
+  success = 0; limit = -1;                     % initialize quanteties
+  while 1
+    while ((f2 > f1+z1*RHO*d1) | (d2 > -SIG*d1)) & (M > 0) 
+      limit = z1;                                         % tighten the bracket
+      if f2 > f1
+        z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3);                 % quadratic fit
+      else
+        A = 6*(f2-f3)/z3+3*(d2+d3);                                 % cubic fit
+        B = 3*(f3-f2)-z3*(d3+2*d2);
+        z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A;       % numerical error possible - ok!
+      end
+      if isnan(z2) | isinf(z2)
+        z2 = z3/2;                  % if we had a numerical problem then bisect
+      end
+      z2 = max(min(z2, INT*z3),(1-INT)*z3);  % don't accept too close to limits
+      z1 = z1 + z2;                                           % update the step
+      X = X + z2*s;
+      [f2 df2] = eval(argstr);
+      M = M - 1; i = i + (length<0);                           % count epochs?!
+      d2 = df2'*s;
+      z3 = z3-z2;                    % z3 is now relative to the location of z2
+    end
+    if f2 > f1+z1*RHO*d1 | d2 > -SIG*d1
+      break;                                                % this is a failure
+    elseif d2 > SIG*d1
+      success = 1; break;                                             % success
+    elseif M == 0
+      break;                                                          % failure
+    end
+    A = 6*(f2-f3)/z3+3*(d2+d3);                      % make cubic extrapolation
+    B = 3*(f3-f2)-z3*(d3+2*d2);
+    z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3));        % num. error possible - ok!
+    if ~isreal(z2) | isnan(z2) | isinf(z2) | z2 < 0   % num prob or wrong sign?
+      if limit < -0.5                               % if we have no upper limit
+        z2 = z1 * (EXT-1);                 % the extrapolate the maximum amount
+      else
+        z2 = (limit-z1)/2;                                   % otherwise bisect
+      end
+    elseif (limit > -0.5) & (z2+z1 > limit)          % extraplation beyond max?
+      z2 = (limit-z1)/2;                                               % bisect
+    elseif (limit < -0.5) & (z2+z1 > z1*EXT)       % extrapolation beyond limit
+      z2 = z1*(EXT-1.0);                           % set to extrapolation limit
+    elseif z2 < -z3*INT
+      z2 = -z3*INT;
+    elseif (limit > -0.5) & (z2 < (limit-z1)*(1.0-INT))   % too close to limit?
+      z2 = (limit-z1)*(1.0-INT);
+    end
+    f3 = f2; d3 = d2; z3 = -z2;                  % set point 3 equal to point 2
+    z1 = z1 + z2; X = X + z2*s;                      % update current estimates
+    [f2 df2] = eval(argstr);
+    M = M - 1; i = i + (length<0);                             % count epochs?!
+    d2 = df2'*s;
+  end                                                      % end of line search
+
+  if success                                         % if line search succeeded
+    f1 = f2; fX = [fX' f1]';
+    fprintf('%s %4i | Cost: %4.6e\r', S, i, f1);
+    s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2;      % Polack-Ribiere direction
+    tmp = df1; df1 = df2; df2 = tmp;                         % swap derivatives
+    d2 = df1'*s;
+    if d2 > 0                                      % new slope must be negative
+      s = -df1;                              % otherwise use steepest direction
+      d2 = -s'*s;    
+    end
+    z1 = z1 * min(RATIO, d1/(d2-realmin));          % slope ratio but max RATIO
+    d1 = d2;
+    ls_failed = 0;                              % this line search did not fail
+  else
+    X = X0; f1 = f0; df1 = df0;  % restore point from before failed line search
+    if ls_failed | i > abs(length)          % line search failed twice in a row
+      break;                             % or we ran out of time, so we give up
+    end
+    tmp = df1; df1 = df2; df2 = tmp;                         % swap derivatives
+    s = -df1;                                                    % try steepest
+    d1 = -s'*s;
+    z1 = 1/(1-d1);                     
+    ls_failed = 1;                                    % this line search failed
+  end
+  if exist('OCTAVE_VERSION')
+    fflush(stdout);
+  end
+end
+fprintf('\n');
--- a/science/r/12.1/mlclass-ex4/nnCostFunction.m
+++ b/science/r/12.1/mlclass-ex4/nnCostFunction.m
@ -0,0 +1,135 @@
+function [J grad] = nnCostFunction(nn_params, ...
+                                   input_layer_size, ...
+                                   hidden_layer_size, ...
+                                   num_labels, ...
+                                   X, y, lambda)
+%NNCOSTFUNCTION Implements the neural network cost function for a two layer
+%neural network which performs classification
+%   [J grad] = NNCOSTFUNCTON(nn_params, hidden_layer_size, num_labels, ...
+%   X, y, lambda) computes the cost and gradient of the neural network. The
+%   parameters for the neural network are "unrolled" into the vector
+%   nn_params and need to be converted back into the weight matrices. 
+% 
+%   The returned parameter grad should be a "unrolled" vector of the
+%   partial derivatives of the neural network.
+%
+
+% Reshape nn_params back into the parameters Theta1 and Theta2, the weight matrices
+% for our 2 layer neural network
+Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
+                 hidden_layer_size, (input_layer_size + 1));
+
+Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
+                 num_labels, (hidden_layer_size + 1));
+
+% Setup some useful variables
+m = size(X, 1);
+         
+% You need to return the following variables correctly 
+J = 0;
+Theta1_grad = zeros(size(Theta1));
+Theta2_grad = zeros(size(Theta2));
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: You should complete the code by working through the
+%               following parts.
+%
+% Part 1: Feedforward the neural network and return the cost in the
+%         variable J. After implementing Part 1, you can verify that your
+%         cost function computation is correct by verifying the cost
+%         computed in ex4.m
+%
+% Part 2: Implement the backpropagation algorithm to compute the gradients
+%         Theta1_grad and Theta2_grad. You should return the partial derivatives of
+%         the cost function with respect to Theta1 and Theta2 in Theta1_grad and
+%         Theta2_grad, respectively. After implementing Part 2, you can check
+%         that your implementation is correct by running checkNNGradients
+%
+%         Note: The vector y passed into the function is a vector of labels
+%               containing values from 1..K. You need to map this vector into a 
+%               binary vector of 1's and 0's to be used with the neural network
+%               cost function.
+%
+%         Hint: We recommend implementing backpropagation using a for-loop
+%               over the training examples if you are implementing it for the 
+%               first time.
+%
+% Part 3: Implement regularization with the cost function and gradients.
+%
+%         Hint: You can implement this around the code for
+%               backpropagation. That is, you can compute the gradients for
+%               the regularization separately and then add them to Theta1_grad
+%               and Theta2_grad from Part 2.
+%
+
+%feedforward propagation
+X = [ones(m, 1) X];
+
+z2 = X * Theta1';
+a2 = sigmoid(z2);
+a2 = [ones(m, 1) a2];
+
+z3 = a2 * Theta2';
+a3 = sigmoid(z3);
+
+% one hot encoding
+y_matrix = eye(num_labels)(y, :);
+
+%sum m - обучающие параметры
+%sum K - по классам (выходным нейронам)
+J = (1/m) * sum(sum(-y_matrix .* log(a3) - (1 - y_matrix) .* log(1 - a3)));
+
+reg_term = (lambda/(2*m)) * (sum(sum(Theta1(:, 2:end).^2)) + sum(sum(Theta2(:, 2:end).^2)));
+J = J + reg_term;
+
+Delta1 = zeros(size(Theta1));
+Delta2 = zeros(size(Theta2));
+
+for t = 1:m
+    %feedforward propagation
+    a1 = X(t, :)';
+    z2 = Theta1 * a1;
+    a2 = [1; sigmoid(z2)];
+
+    z3 = Theta2 * a2;
+    a3 = sigmoid(z3);
+
+    %difference between the network’s activation and the true target value
+    delta3 = a3 - y_matrix(t, :)';
+
+    delta2 = (Theta2' * delta3) .* [1; sigmoidGradient(z2)];
+    delta2 = delta2(2:end);
+
+    %measures how much that node was “responsible” for any errors in our output.
+    Delta1 = Delta1 + delta2 * a1';
+    Delta2 = Delta2 + delta3 * a2';
+end
+
+%divide the accumulated gradients by m to obtain the gradients for the neural network cost function.
+Theta1_grad = Delta1 / m;
+Theta2_grad = Delta2 / m;
+
+Theta1_grad(:, 2:end) = Theta1_grad(:, 2:end) + (lambda/m) * Theta1(:, 2:end);
+Theta2_grad(:, 2:end) = Theta2_grad(:, 2:end) + (lambda/m) * Theta2(:, 2:end);
+
+
+
+
+
+
+
+
+
+
+
+
+
+% -------------------------------------------------------------
+
+% =========================================================================
+
+% Unroll gradients
+grad = [Theta1_grad(:) ; Theta2_grad(:)];
+
+
+end
--- a/science/r/12.1/mlclass-ex4/octave-workspace
+++ b/science/r/12.1/mlclass-ex4/octave-workspace
--- a/science/r/12.1/mlclass-ex4/predict.m
+++ b/science/r/12.1/mlclass-ex4/predict.m
@ -0,0 +1,20 @@
+function p = predict(Theta1, Theta2, X)
+%PREDICT Predict the label of an input given a trained neural network
+%   p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
+%   trained weights of a neural network (Theta1, Theta2)
+
+% Useful values
+m = size(X, 1);
+num_labels = size(Theta2, 1);
+
+% You need to return the following variables correctly 
+p = zeros(size(X, 1), 1);
+
+h1 = sigmoid([ones(m, 1) X] * Theta1');
+h2 = sigmoid([ones(m, 1) h1] * Theta2');
+[dummy, p] = max(h2, [], 2);
+
+% =========================================================================
+
+
+end
--- a/science/r/12.1/mlclass-ex4/randInitializeWeights.m
+++ b/science/r/12.1/mlclass-ex4/randInitializeWeights.m
@ -0,0 +1,33 @@
+function W = randInitializeWeights(L_in, L_out)
+%RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in
+%incoming connections and L_out outgoing connections
+%   W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights 
+%   of a layer with L_in incoming connections and L_out outgoing 
+%   connections. 
+%
+%   Note that W should be set to a matrix of size(L_out, 1 + L_in) as
+%   the first row of W handles the "bias" terms
+%
+
+% You need to return the following variables correctly 
+W = zeros(L_out, 1 + L_in);
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Initialize W randomly so that we break the symmetry while
+%               training the neural network.
+%
+% Note: The first row of W corresponds to the parameters for the bias units
+%
+
+
+
+
+epsilon_init = 0.12;
+W = rand(L_out, 1 + L_in) * 2 * epsilon_init - epsilon_init;
+
+
+
+
+% =========================================================================
+
+end
--- a/science/r/12.1/mlclass-ex4/sigmoid.m
+++ b/science/r/12.1/mlclass-ex4/sigmoid.m
@ -0,0 +1,6 @@
+function g = sigmoid(z)
+%SIGMOID Compute sigmoid functoon
+%   J = SIGMOID(z) computes the sigmoid of z.
+
+g = 1.0 ./ (1.0 + exp(-z));
+end
--- a/science/r/12.1/mlclass-ex4/sigmoidGradient.m
+++ b/science/r/12.1/mlclass-ex4/sigmoidGradient.m
@ -0,0 +1,33 @@
+function g = sigmoidGradient(z)
+%SIGMOIDGRADIENT returns the gradient of the sigmoid function
+%evaluated at z
+%   g = SIGMOIDGRADIENT(z) computes the gradient of the sigmoid function
+%   evaluated at z. This should work regardless if z is a matrix or a
+%   vector. In particular, if z is a vector or matrix, you should return
+%   the gradient for each element.
+
+g = zeros(size(z));
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Compute the gradient of the sigmoid function evaluated at
+%               each value of z (z can be a matrix, vector or scalar).
+
+
+
+g = sigmoid(z) .* (1 - sigmoid(z));
+
+
+
+
+
+
+
+
+
+
+% =============================================================
+
+
+
+
+end
--- a/science/r/12.1/mlclass-ex4/submit.m
+++ b/science/r/12.1/mlclass-ex4/submit.m
@ -0,0 +1,337 @@
+function submit(partId)
+%SUBMIT Submit your code and output to the ml-class servers
+%   SUBMIT() will connect to the ml-class server and submit your solution
+
+  fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ...
+          homework_id());
+  if ~exist('partId', 'var') || isempty(partId)
+    partId = promptPart();
+  end
+  
+  % Check valid partId
+  partNames = validParts();
+  if ~isValidPartId(partId)
+    fprintf('!! Invalid homework part selected.\n');
+    fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1);
+    fprintf('!! Submission Cancelled\n');
+    return
+  end
+
+  [login password] = loginPrompt();
+  if isempty(login)
+    fprintf('!! Submission Cancelled\n');
+    return
+  end
+
+  fprintf('\n== Connecting to ml-class ... '); 
+  if exist('OCTAVE_VERSION') 
+    fflush(stdout);
+  end
+  
+  % Setup submit list
+  if partId == numel(partNames) + 1
+    submitParts = 1:numel(partNames);
+  else
+    submitParts = [partId];
+  end
+
+  for s = 1:numel(submitParts)
+    % Submit this part
+    partId = submitParts(s);
+    
+    % Get Challenge
+    [login, ch, signature] = getChallenge(login);
+    if isempty(login) || isempty(ch) || isempty(signature)
+      % Some error occured, error string in first return element.
+      fprintf('\n!! Error: %s\n\n', login);
+      return
+    end
+  
+    % Attempt Submission with Challenge
+    ch_resp = challengeResponse(login, password, ch);
+    [result, str] = submitSolution(login, ch_resp, partId, output(partId), ...
+                                 source(partId), signature);
+                                 
+    fprintf('\n== [ml-class] Submitted Homework %s - Part %d - %s\n', ...
+            homework_id(), partId, partNames{partId});
+    fprintf('== %s\n', strtrim(str));
+    if exist('OCTAVE_VERSION') 
+      fflush(stdout);
+    end
+  end
+  
+end
+
+% ================== CONFIGURABLES FOR EACH HOMEWORK ==================
+
+function id = homework_id() 
+  id = '4';
+end
+
+function [partNames] = validParts()
+  partNames = { 'Feedforward and Cost Function', ...
+                'Regularized Cost Function', ...
+                'Sigmoid Gradient', ...
+                'Neural Network Gradient (Backpropagation)' ...
+                'Regularized Gradient' ...
+                };
+end
+
+function srcs = sources()
+  % Separated by part
+  srcs = { { 'nnCostFunction.m' }, ...
+           { 'nnCostFunction.m' }, ...
+           { 'sigmoidGradient.m' }, ...
+           { 'nnCostFunction.m' }, ...
+           { 'nnCostFunction.m' } };
+end
+
+function out = output(partId)
+  % Random Test Cases
+  X = reshape(3 * sin(1:1:30), 3, 10);
+  Xm = reshape(sin(1:32), 16, 2) / 5;
+  ym = 1 + mod(1:16,4)';
+  t1 = sin(reshape(1:2:24, 4, 3));
+  t2 = cos(reshape(1:2:40, 4, 5));
+  t  = [t1(:) ; t2(:)];
+  if partId == 1
+    [J] = nnCostFunction(t, 2, 4, 4, Xm, ym, 0);
+    out = sprintf('%0.5f ', J);
+  elseif partId == 2
+    [J] = nnCostFunction(t, 2, 4, 4, Xm, ym, 1.5);
+    out = sprintf('%0.5f ', J);
+  elseif partId == 3
+    out = sprintf('%0.5f ', sigmoidGradient(X));
+  elseif partId == 4
+    [J, grad] = nnCostFunction(t, 2, 4, 4, Xm, ym, 0);
+    out = sprintf('%0.5f ', J);
+    out = [out sprintf('%0.5f ', grad)];
+  elseif partId == 5
+    [J, grad] = nnCostFunction(t, 2, 4, 4, Xm, ym, 1.5);
+    out = sprintf('%0.5f ', J);
+    out = [out sprintf('%0.5f ', grad)];
+  end 
+end
+
+function url = challenge_url()
+  url = 'http://www.ml-class.org/course/homework/challenge';
+end
+
+function url = submit_url()
+  url = 'http://www.ml-class.org/course/homework/submit';
+end
+
+% ========================= CHALLENGE HELPERS =========================
+
+function src = source(partId)
+  src = '';
+  src_files = sources();
+  if partId <= numel(src_files)
+      flist = src_files{partId};
+      for i = 1:numel(flist)
+          fid = fopen(flist{i});
+          while ~feof(fid)
+            line = fgets(fid);
+            src = [src line];
+          end
+          fclose(fid);
+          src = [src '||||||||'];
+      end
+  end
+end
+
+function ret = isValidPartId(partId)
+  partNames = validParts();
+  ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1);
+end
+
+function partId = promptPart()
+  fprintf('== Select which part(s) to submit:\n', ...
+          homework_id());
+  partNames = validParts();
+  srcFiles = sources();
+  for i = 1:numel(partNames)
+    fprintf('==   %d) %s [', i, partNames{i});
+    fprintf(' %s ', srcFiles{i}{:});
+    fprintf(']\n');
+  end
+  fprintf('==   %d) All of the above \n==\nEnter your choice [1-%d]: ', ...
+          numel(partNames) + 1, numel(partNames) + 1);
+  selPart = input('', 's');
+  partId = str2num(selPart);
+  if ~isValidPartId(partId)
+    partId = -1;
+  end
+end
+
+function [email,ch,signature] = getChallenge(email)
+  str = urlread(challenge_url(), 'post', {'email_address', email});
+
+  str = strtrim(str);
+  [email, str] = strtok (str, '|');
+  [ch, str] = strtok (str, '|');
+  [signature, str] = strtok (str, '|');
+end
+
+
+function [result, str] = submitSolution(email, ch_resp, part, output, ...
+                                        source, signature)
+
+  params = {'homework', homework_id(), ...
+            'part', num2str(part), ...
+            'email', email, ...
+            'output', output, ...
+            'source', source, ...
+            'challenge_response', ch_resp, ...
+            'signature', signature};
+
+  str = urlread(submit_url(), 'post', params);
+  
+  % Parse str to read for success / failure
+  result = 0;
+
+end
+
+% =========================== LOGIN HELPERS ===========================
+
+function [login password] = loginPrompt()
+  % Prompt for password
+  [login password] = basicPrompt();
+  
+  if isempty(login) || isempty(password)
+    login = []; password = [];
+  end
+end
+
+
+function [login password] = basicPrompt()
+  login = input('Login (Email address): ', 's');
+  password = input('Password: ', 's');
+end
+
+
+function [str] = challengeResponse(email, passwd, challenge)
+  salt = ')~/|]QMB3[!W`?OVt7qC"@+}';
+  str = sha1([challenge sha1([salt email passwd])]);
+  sel = randperm(numel(str));
+  sel = sort(sel(1:16));
+  str = str(sel);
+end
+
+
+% =============================== SHA-1 ================================
+
+function hash = sha1(str)
+  
+  % Initialize variables
+  h0 = uint32(1732584193);
+  h1 = uint32(4023233417);
+  h2 = uint32(2562383102);
+  h3 = uint32(271733878);
+  h4 = uint32(3285377520);
+  
+  % Convert to word array
+  strlen = numel(str);
+
+  % Break string into chars and append the bit 1 to the message
+  mC = [double(str) 128];
+  mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')];
+  
+  numB = strlen * 8;
+  if exist('idivide')
+    numC = idivide(uint32(numB + 65), 512, 'ceil');
+  else
+    numC = ceil(double(numB + 65)/512);
+  end
+  numW = numC * 16;
+  mW = zeros(numW, 1, 'uint32');
+  
+  idx = 1;
+  for i = 1:4:strlen + 1
+    mW(idx) = bitor(bitor(bitor( ...
+                  bitshift(uint32(mC(i)), 24), ...
+                  bitshift(uint32(mC(i+1)), 16)), ...
+                  bitshift(uint32(mC(i+2)), 8)), ...
+                  uint32(mC(i+3)));
+    idx = idx + 1;
+  end
+  
+  % Append length of message
+  mW(numW - 1) = uint32(bitshift(uint64(numB), -32));
+  mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32));
+
+  % Process the message in successive 512-bit chs
+  for cId = 1 : double(numC)
+    cSt = (cId - 1) * 16 + 1;
+    cEnd = cId * 16;
+    ch = mW(cSt : cEnd);
+    
+    % Extend the sixteen 32-bit words into eighty 32-bit words
+    for j = 17 : 80
+      ch(j) = ch(j - 3);
+      ch(j) = bitxor(ch(j), ch(j - 8));
+      ch(j) = bitxor(ch(j), ch(j - 14));
+      ch(j) = bitxor(ch(j), ch(j - 16));
+      ch(j) = bitrotate(ch(j), 1);
+    end
+  
+    % Initialize hash value for this ch
+    a = h0;
+    b = h1;
+    c = h2;
+    d = h3;
+    e = h4;
+    
+    % Main loop
+    for i = 1 : 80
+      if(i >= 1 && i <= 20)
+        f = bitor(bitand(b, c), bitand(bitcmp(b), d));
+        k = uint32(1518500249);
+      elseif(i >= 21 && i <= 40)
+        f = bitxor(bitxor(b, c), d);
+        k = uint32(1859775393);
+      elseif(i >= 41 && i <= 60)
+        f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d));
+        k = uint32(2400959708);
+      elseif(i >= 61 && i <= 80)
+        f = bitxor(bitxor(b, c), d);
+        k = uint32(3395469782);
+      end
+      
+      t = bitrotate(a, 5);
+      t = bitadd(t, f);
+      t = bitadd(t, e);
+      t = bitadd(t, k);
+      t = bitadd(t, ch(i));
+      e = d;
+      d = c;
+      c = bitrotate(b, 30);
+      b = a;
+      a = t;
+      
+    end
+    h0 = bitadd(h0, a);
+    h1 = bitadd(h1, b);
+    h2 = bitadd(h2, c);
+    h3 = bitadd(h3, d);
+    h4 = bitadd(h4, e);
+
+  end
+
+  hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]);
+  
+  hash = lower(hash);
+
+end
+
+function ret = bitadd(iA, iB)
+  ret = double(iA) + double(iB);
+  ret = bitset(ret, 33, 0);
+  ret = uint32(ret);
+end
+
+function ret = bitrotate(iA, places)
+  t = bitshift(iA, places - 32);
+  ret = bitshift(iA, places);
+  ret = bitor(ret, t);
+end
--- a/science/r/12.1/mlclass-ex4/submitWeb.m
+++ b/science/r/12.1/mlclass-ex4/submitWeb.m
@ -0,0 +1,354 @@
+function submitWeb(partId)
+%SUBMITWEB Generates a base64 encoded string for web-based submissions
+%   SUBMITWEB() will generate a base64 encoded string so that you can submit your
+%   solutions via a web form
+
+  fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ...
+          homework_id());
+  if ~exist('partId', 'var') || isempty(partId)
+    partId = promptPart();
+  end
+  
+  % Check valid partId
+  partNames = validParts();
+  if ~isValidPartId(partId)
+    fprintf('!! Invalid homework part selected.\n');
+    fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames));
+    fprintf('!! Submission Cancelled\n');
+    return
+  end
+
+  [login] = loginPrompt();
+  if isempty(login)
+    fprintf('!! Submission Cancelled\n');
+    return
+  end
+  
+  [result] = submitSolution(login, partId, output(partId), ...
+                            source(partId));
+  result = base64encode(result);
+
+  fprintf('\nSave as submission file [submit_ex%s_part%d.txt]: ', ...
+          homework_id(), partId);
+  saveAsFile = input('', 's');
+  if (isempty(saveAsFile))
+    saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), partId);
+  end
+
+  fid = fopen(saveAsFile, 'w');
+  if (fid)
+    fwrite(fid, result);
+    fclose(fid);
+    fprintf('\nSaved your solutions to %s.\n\n', saveAsFile);
+    fprintf(['You can now submit your solutions through the web \n' ...
+             'form in the programming exercises. Select the corresponding \n' ...
+             'programming exercise to access the form.\n']);
+
+  else
+    fprintf('Unable to save to %s\n\n', saveAsFile);
+    fprintf(['You can create a submission file by saving the \n' ...
+             'following text in a file: (press enter to continue)\n\n']);
+    pause;
+    fprintf(result);
+  end                  
+
+end
+
+% ================== CONFIGURABLES FOR EACH HOMEWORK ==================
+
+
+function id = homework_id() 
+  id = '4';
+end
+
+function [partNames] = validParts()
+  partNames = { 'Feedforward and Cost Function', ...
+                'Regularized Cost Function', ...
+                'Sigmoid Gradient', ...
+                'Neural Network Gradient (Backpropagation)' ...
+                'Regularized Gradient' ...
+                };
+end
+
+function srcs = sources()
+  % Separated by part
+  srcs = { { 'nnCostFunction.m' }, ...
+           { 'nnCostFunction.m' }, ...
+           { 'sigmoidGradient.m' }, ...
+           { 'nnCostFunction.m' }, ...
+           { 'nnCostFunction.m' } };
+end
+
+function out = output(partId)
+  % Random Test Cases
+  X = reshape(3 * sin(1:1:30), 3, 10);
+  Xm = reshape(sin(1:32), 16, 2) / 5;
+  ym = 1 + mod(1:16,4)';
+  t1 = sin(reshape(1:2:24, 4, 3));
+  t2 = cos(reshape(1:2:40, 4, 5));
+  t  = [t1(:) ; t2(:)];
+  if partId == 1
+    [J] = nnCostFunction(t, 2, 4, 4, Xm, ym, 0);
+    out = sprintf('%0.5f ', J);
+  elseif partId == 2
+    [J] = nnCostFunction(t, 2, 4, 4, Xm, ym, 1.5);
+    out = sprintf('%0.5f ', J);
+  elseif partId == 3
+    out = sprintf('%0.5f ', sigmoidGradient(X));
+  elseif partId == 4
+    [J, grad] = nnCostFunction(t, 2, 4, 4, Xm, ym, 0);
+    out = sprintf('%0.5f ', J);
+    out = [out sprintf('%0.5f ', grad)];
+  elseif partId == 5
+    [J, grad] = nnCostFunction(t, 2, 4, 4, Xm, ym, 1.5);
+    out = sprintf('%0.5f ', J);
+    out = [out sprintf('%0.5f ', grad)];
+  end 
+end
+
+
+% ========================= SUBMIT HELPERS =========================
+
+function src = source(partId)
+  src = '';
+  src_files = sources();
+  if partId <= numel(src_files)
+      flist = src_files{partId};
+      for i = 1:numel(flist)
+          fid = fopen(flist{i});
+          while ~feof(fid)
+            line = fgets(fid);
+            src = [src line];
+          end
+          fclose(fid);
+          src = [src '||||||||'];
+      end
+  end
+end
+
+function ret = isValidPartId(partId)
+  partNames = validParts();
+  ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames));
+end
+
+function partId = promptPart()
+  fprintf('== Select which part(s) to submit:\n', ...
+          homework_id());
+  partNames = validParts();
+  srcFiles = sources();
+  for i = 1:numel(partNames)
+    fprintf('==   %d) %s [', i, partNames{i});
+    fprintf(' %s ', srcFiles{i}{:});
+    fprintf(']\n');
+  end
+  fprintf('\nEnter your choice [1-%d]: ', ...
+          numel(partNames));
+  selPart = input('', 's');
+  partId = str2num(selPart);
+  if ~isValidPartId(partId)
+    partId = -1;
+  end
+end
+
+
+function [result, str] = submitSolution(email, part, output, source)
+
+  result = ['a:5:{' ...
+            p_s('homework') p_s64(homework_id()) ...
+            p_s('part') p_s64(part) ...
+            p_s('email') p_s64(email) ...
+            p_s('output') p_s64(output) ...
+            p_s('source') p_s64(source) ...
+            '}'];
+
+end
+
+function s = p_s(str)
+   s = ['s:' num2str(numel(str)) ':"' str '";'];
+end
+
+function s = p_s64(str)
+   str = base64encode(str, '');
+   s = ['s:' num2str(numel(str)) ':"' str '";'];
+end
+
+% =========================== LOGIN HELPERS ===========================
+
+function [login] = loginPrompt()
+  % Prompt for password
+  [login] = basicPrompt();
+end
+
+
+function [login] = basicPrompt()
+  login = input('Login (Email address): ', 's');
+end
+
+
+% =========================== Base64 Encoder ============================
+% Thanks to Peter John Acklam
+%
+
+function y = base64encode(x, eol)
+%BASE64ENCODE Perform base64 encoding on a string.
+%
+%   BASE64ENCODE(STR, EOL) encode the given string STR.  EOL is the line ending
+%   sequence to use; it is optional and defaults to '\n' (ASCII decimal 10).
+%   The returned encoded string is broken into lines of no more than 76
+%   characters each, and each line will end with EOL unless it is empty.  Let
+%   EOL be empty if you do not want the encoded string broken into lines.
+%
+%   STR and EOL don't have to be strings (i.e., char arrays).  The only
+%   requirement is that they are vectors containing values in the range 0-255.
+%
+%   This function may be used to encode strings into the Base64 encoding
+%   specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions).  The
+%   Base64 encoding is designed to represent arbitrary sequences of octets in a
+%   form that need not be humanly readable.  A 65-character subset
+%   ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per
+%   printable character.
+%
+%   Examples
+%   --------
+%
+%   If you want to encode a large file, you should encode it in chunks that are
+%   a multiple of 57 bytes.  This ensures that the base64 lines line up and
+%   that you do not end up with padding in the middle.  57 bytes of data fills
+%   one complete base64 line (76 == 57*4/3):
+%
+%   If ifid and ofid are two file identifiers opened for reading and writing,
+%   respectively, then you can base64 encode the data with
+%
+%      while ~feof(ifid)
+%         fwrite(ofid, base64encode(fread(ifid, 60*57)));
+%      end
+%
+%   or, if you have enough memory,
+%
+%      fwrite(ofid, base64encode(fread(ifid)));
+%
+%   See also BASE64DECODE.
+
+%   Author:      Peter John Acklam
+%   Time-stamp:  2004-02-03 21:36:56 +0100
+%   E-mail:      pjacklam@online.no
+%   URL:         http://home.online.no/~pjacklam
+
+   if isnumeric(x)
+      x = num2str(x);
+   end
+
+   % make sure we have the EOL value
+   if nargin < 2
+      eol = sprintf('\n');
+   else
+      if sum(size(eol) > 1) > 1
+         error('EOL must be a vector.');
+      end
+      if any(eol(:) > 255)
+         error('EOL can not contain values larger than 255.');
+      end
+   end
+
+   if sum(size(x) > 1) > 1
+      error('STR must be a vector.');
+   end
+
+   x   = uint8(x);
+   eol = uint8(eol);
+
+   ndbytes = length(x);                 % number of decoded bytes
+   nchunks = ceil(ndbytes / 3);         % number of chunks/groups
+   nebytes = 4 * nchunks;               % number of encoded bytes
+
+   % add padding if necessary, to make the length of x a multiple of 3
+   if rem(ndbytes, 3)
+      x(end+1 : 3*nchunks) = 0;
+   end
+
+   x = reshape(x, [3, nchunks]);        % reshape the data
+   y = repmat(uint8(0), 4, nchunks);    % for the encoded data
+
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   % Split up every 3 bytes into 4 pieces
+   %
+   %    aaaaaabb bbbbcccc ccdddddd
+   %
+   % to form
+   %
+   %    00aaaaaa 00bbbbbb 00cccccc 00dddddd
+   %
+   y(1,:) = bitshift(x(1,:), -2);                  % 6 highest bits of x(1,:)
+
+   y(2,:) = bitshift(bitand(x(1,:), 3), 4);        % 2 lowest bits of x(1,:)
+   y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4));   % 4 highest bits of x(2,:)
+
+   y(3,:) = bitshift(bitand(x(2,:), 15), 2);       % 4 lowest bits of x(2,:)
+   y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6));   % 2 highest bits of x(3,:)
+
+   y(4,:) = bitand(x(3,:), 63);                    % 6 lowest bits of x(3,:)
+
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   % Now perform the following mapping
+   %
+   %   0  - 25  ->  A-Z
+   %   26 - 51  ->  a-z
+   %   52 - 61  ->  0-9
+   %   62       ->  +
+   %   63       ->  /
+   %
+   % We could use a mapping vector like
+   %
+   %   ['A':'Z', 'a':'z', '0':'9', '+/']
+   %
+   % but that would require an index vector of class double.
+   %
+   z = repmat(uint8(0), size(y));
+   i =           y <= 25;  z(i) = 'A'      + double(y(i));
+   i = 26 <= y & y <= 51;  z(i) = 'a' - 26 + double(y(i));
+   i = 52 <= y & y <= 61;  z(i) = '0' - 52 + double(y(i));
+   i =           y == 62;  z(i) = '+';
+   i =           y == 63;  z(i) = '/';
+   y = z;
+
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   % Add padding if necessary.
+   %
+   npbytes = 3 * nchunks - ndbytes;     % number of padding bytes
+   if npbytes
+      y(end-npbytes+1 : end) = '=';     % '=' is used for padding
+   end
+
+   if isempty(eol)
+
+      % reshape to a row vector
+      y = reshape(y, [1, nebytes]);
+
+   else
+
+      nlines = ceil(nebytes / 76);      % number of lines
+      neolbytes = length(eol);          % number of bytes in eol string
+
+      % pad data so it becomes a multiple of 76 elements
+      y = [y(:) ; zeros(76 * nlines - numel(y), 1)];
+      y(nebytes + 1 : 76 * nlines) = 0;
+      y = reshape(y, 76, nlines);
+
+      % insert eol strings
+      eol = eol(:);
+      y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines));
+
+      % remove padding, but keep the last eol string
+      m = nebytes + neolbytes * (nlines - 1);
+      n = (76+neolbytes)*nlines - neolbytes;
+      y(m+1 : n) = '';
+
+      % extract and reshape to row vector
+      y = reshape(y, 1, m+neolbytes);
+
+   end
+
+   % output is a character array
+   y = char(y);
+
+end
--- a/science/r/12.Rmd
+++ b/science/r/12.Rmd
@ -0,0 +1,59 @@
+---
+title: "Lab11: NLP"
+author: "Vladislav Litvinov <vlad@sek1ro>"
+output:
+  pdf_document:
+  toc_float: TRUE
+---
+How does TF-IDF enhance the relevance of a search result?
+Why reduce dimensions in text analysis?
+  Curse of dimensionality
+  Computational cost
+  Overfitting
+  
+  Feature selection
+  Neural embeddings
+```{r}
+library("tm")
+library("wordcloud")
+library("stringr")
+data("crude")
+
+docs = unlist(str_split(crude[[2]]$content, "(?<=[.!?])\\s+"))
+
+docs = VCorpus(VectorSource(docs))
+
+docs = tm_map(docs, content_transformer(tolower))
+docs = tm_map(docs, removeNumbers)
+docs = tm_map(docs, removeWords, stopwords("english"))
+docs = tm_map(docs, removePunctuation)
+docs = tm_map(docs, stripWhitespace)
+
+sapply(docs, content)
+```
+```{r}
+docs = docs[-c(10, 18)]
+sapply(docs, content)
+```
+```{r}
+showWordCloud = function(tdm) {
+  m = as.matrix(tdm)
+  v = sort(rowSums(m),decreasing=TRUE)
+  d = data.frame(word = names(v),freq=v)
+  print(d)
+  wordcloud(
+    words = d$word,
+    freq = d$freq,
+    min.freq = 0,
+    max.words = 50,
+    random.order=FALSE,
+  )
+}
+
+showWordCloud(TermDocumentMatrix(docs))
+```
+
+```{r}
+tdm = TermDocumentMatrix(docs, control = list(weighting = weightTfIdf))
+showWordCloud(tdm)
+```
--- a/science/r/12/ex3.pdf
+++ b/science/r/12/ex3.pdf
--- a/science/r/12/mlclass-ex3/displayData.m
+++ b/science/r/12/mlclass-ex3/displayData.m
@ -0,0 +1,59 @@
+function [h, display_array] = displayData(X, example_width)
+%DISPLAYDATA Display 2D data in a nice grid
+%   [h, display_array] = DISPLAYDATA(X, example_width) displays 2D data
+%   stored in X in a nice grid. It returns the figure handle h and the 
+%   displayed array if requested.
+
+% Set example_width automatically if not passed in
+if ~exist('example_width', 'var') || isempty(example_width) 
+	example_width = round(sqrt(size(X, 2)));
+end
+
+% Gray Image
+colormap(gray);
+
+% Compute rows, cols
+[m n] = size(X);
+example_height = (n / example_width);
+
+% Compute number of items to display
+display_rows = floor(sqrt(m));
+display_cols = ceil(m / display_rows);
+
+% Between images padding
+pad = 1;
+
+% Setup blank display
+display_array = - ones(pad + display_rows * (example_height + pad), ...
+                       pad + display_cols * (example_width + pad));
+
+% Copy each example into a patch on the display array
+curr_ex = 1;
+for j = 1:display_rows
+	for i = 1:display_cols
+		if curr_ex > m, 
+			break; 
+		end
+		% Copy the patch
+		
+		% Get the max value of the patch
+		max_val = max(abs(X(curr_ex, :)));
+		display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ...
+		              pad + (i - 1) * (example_width + pad) + (1:example_width)) = ...
+						reshape(X(curr_ex, :), example_height, example_width) / max_val;
+		curr_ex = curr_ex + 1;
+	end
+	if curr_ex > m, 
+		break; 
+	end
+end
+
+% Display Image
+h = imagesc(display_array, [-1 1]);
+
+% Do not show axis
+axis image off
+
+drawnow;
+
+end
--- a/science/r/12/mlclass-ex3/ex3.m
+++ b/science/r/12/mlclass-ex3/ex3.m
@ -0,0 +1,69 @@
+%% Machine Learning Online Class - Exercise 3 | Part 1: One-vs-all
+
+%  Instructions
+%  ------------
+% 
+%  This file contains code that helps you get started on the
+%  linear exercise. You will need to complete the following functions 
+%  in this exericse:
+%
+%     lrCostFunction.m (logistic regression cost function)
+%     oneVsAll.m
+%     predictOneVsAll.m
+%     predict.m
+%
+%  For this exercise, you will not need to change any code in this file,
+%  or any other files other than those mentioned above.
+%
+
+%% Initialization
+clear ; close all; clc
+
+%% Setup the parameters you will use for this part of the exercise
+input_layer_size  = 400;  % 20x20 Input Images of Digits
+num_labels = 10;          % 10 labels, from 1 to 10   
+                          % (note that we have mapped "0" to label 10)
+
+%% =========== Part 1: Loading and Visualizing Data =============
+%  We start the exercise by first loading and visualizing the dataset. 
+%  You will be working with a dataset that contains handwritten digits.
+%
+
+% Load Training Data
+fprintf('Loading and Visualizing Data ...\n')
+
+load('ex3data1.mat'); % training data stored in arrays X, y
+m = size(X, 1);
+
+% Randomly select 100 data points to display
+rand_indices = randperm(m);
+sel = X(rand_indices(1:100), :);
+
+displayData(sel);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% ============ Part 2: Vectorize Logistic Regression ============
+%  In this part of the exercise, you will reuse your logistic regression
+%  code from the last exercise. You task here is to make sure that your
+%  regularized logistic regression implementation is vectorized. After
+%  that, you will implement one-vs-all classification for the handwritten
+%  digit dataset.
+%
+
+fprintf('\nTraining One-vs-All Logistic Regression...\n')
+
+lambda = 0.1;
+[all_theta] = oneVsAll(X, y, num_labels, lambda);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% ================ Part 3: Predict for One-Vs-All ================
+%  After ...
+pred = predictOneVsAll(all_theta, X);
+
+fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100);
+
--- a/science/r/12/mlclass-ex3/ex3_nn.m
+++ b/science/r/12/mlclass-ex3/ex3_nn.m
@ -0,0 +1,88 @@
+%% Machine Learning Online Class - Exercise 3 | Part 2: Neural Networks
+
+%  Instructions
+%  ------------
+% 
+%  This file contains code that helps you get started on the
+%  linear exercise. You will need to complete the following functions 
+%  in this exericse:
+%
+%     lrCostFunction.m (logistic regression cost function)
+%     oneVsAll.m
+%     predictOneVsAll.m
+%     predict.m
+%
+%  For this exercise, you will not need to change any code in this file,
+%  or any other files other than those mentioned above.
+%
+
+%% Initialization
+clear ; close all; clc
+
+%% Setup the parameters you will use for this exercise
+input_layer_size  = 400;  % 20x20 Input Images of Digits
+hidden_layer_size = 25;   % 25 hidden units
+num_labels = 10;          % 10 labels, from 1 to 10   
+                          % (note that we have mapped "0" to label 10)
+
+%% =========== Part 1: Loading and Visualizing Data =============
+%  We start the exercise by first loading and visualizing the dataset. 
+%  You will be working with a dataset that contains handwritten digits.
+%
+
+% Load Training Data
+fprintf('Loading and Visualizing Data ...\n')
+
+load('ex3data1.mat');
+m = size(X, 1);
+
+% Randomly select 100 data points to display
+sel = randperm(size(X, 1));
+sel = sel(1:100);
+
+displayData(X(sel, :));
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% ================ Part 2: Loading Pameters ================
+% In this part of the exercise, we load some pre-initialized 
+% neural network parameters.
+
+fprintf('\nLoading Saved Neural Network Parameters ...\n')
+
+% Load the weights into variables Theta1 and Theta2
+load('ex3weights.mat');
+
+%% ================= Part 3: Implement Predict =================
+%  After training the neural network, we would like to use it to predict
+%  the labels. You will now implement the "predict" function to use the
+%  neural network to predict the labels of the training set. This lets
+%  you compute the training set accuracy.
+
+pred = predict(Theta1, Theta2, X);
+
+fprintf('\nTraining Set Accuracy: %f\n', mean(double(pred == y)) * 100);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%  To give you an idea of the network's output, you can also run
+%  through the examples one at the a time to see what it is predicting.
+
+%  Randomly permute examples
+rp = randperm(m);
+
+for i = 1:m
+    % Display 
+    fprintf('\nDisplaying Example Image\n');
+    displayData(X(rp(i), :));
+
+    pred = predict(Theta1, Theta2, X(rp(i),:));
+    fprintf('\nNeural Network Prediction: %d (digit %d)\n', pred, mod(pred, 10));
+    
+    % Pause
+    fprintf('Program paused. Press enter to continue.\n');
+    pause;
+end
+
--- a/science/r/12/mlclass-ex3/ex3data1.mat
+++ b/science/r/12/mlclass-ex3/ex3data1.mat
--- a/science/r/12/mlclass-ex3/ex3weights.mat
+++ b/science/r/12/mlclass-ex3/ex3weights.mat
--- a/science/r/12/mlclass-ex3/fmincg.m
+++ b/science/r/12/mlclass-ex3/fmincg.m
@ -0,0 +1,175 @@
+function [X, fX, i] = fmincg(f, X, options, P1, P2, P3, P4, P5)
+% Minimize a continuous differentialble multivariate function. Starting point
+% is given by "X" (D by 1), and the function named in the string "f", must
+% return a function value and a vector of partial derivatives. The Polack-
+% Ribiere flavour of conjugate gradients is used to compute search directions,
+% and a line search using quadratic and cubic polynomial approximations and the
+% Wolfe-Powell stopping criteria is used together with the slope ratio method
+% for guessing initial step sizes. Additionally a bunch of checks are made to
+% make sure that exploration is taking place and that extrapolation will not
+% be unboundedly large. The "length" gives the length of the run: if it is
+% positive, it gives the maximum number of line searches, if negative its
+% absolute gives the maximum allowed number of function evaluations. You can
+% (optionally) give "length" a second component, which will indicate the
+% reduction in function value to be expected in the first line-search (defaults
+% to 1.0). The function returns when either its length is up, or if no further
+% progress can be made (ie, we are at a minimum, or so close that due to
+% numerical problems, we cannot get any closer). If the function terminates
+% within a few iterations, it could be an indication that the function value
+% and derivatives are not consistent (ie, there may be a bug in the
+% implementation of your "f" function). The function returns the found
+% solution "X", a vector of function values "fX" indicating the progress made
+% and "i" the number of iterations (line searches or function evaluations,
+% depending on the sign of "length") used.
+%
+% Usage: [X, fX, i] = minimize(X, f, length, P1, P2, P3, P4, P5)
+%
+% See also: checkgrad 
+%
+% Copyright (C) 2001 and 2002 by Carl Edward Rasmussen. Date 2002-02-13
+%
+%
+% (C) Copyright 1999, 2000 & 2001, Carl Edward Rasmussen
+% 
+% Permission is granted for anyone to copy, use, or modify these
+% programs and accompanying documents for purposes of research or
+% education, provided this copyright notice is retained, and note is
+% made of any changes that have been made.
+% 
+% These programs and documents are distributed without any warranty,
+% express or implied.  As the programs were written for research
+% purposes only, they have not been tested to the degree that would be
+% advisable in any important application.  All use of these programs is
+% entirely at the user's own risk.
+%
+% [ml-class] Changes Made:
+% 1) Function name and argument specifications
+% 2) Output display
+%
+
+% Read options
+if exist('options', 'var') && ~isempty(options) && isfield(options, 'MaxIter')
+    length = options.MaxIter;
+else
+    length = 100;
+end
+
+
+RHO = 0.01;                            % a bunch of constants for line searches
+SIG = 0.5;       % RHO and SIG are the constants in the Wolfe-Powell conditions
+INT = 0.1;    % don't reevaluate within 0.1 of the limit of the current bracket
+EXT = 3.0;                    % extrapolate maximum 3 times the current bracket
+MAX = 20;                         % max 20 function evaluations per line search
+RATIO = 100;                                      % maximum allowed slope ratio
+
+argstr = ['feval(f, X'];                      % compose string used to call function
+for i = 1:(nargin - 3)
+  argstr = [argstr, ',P', int2str(i)];
+end
+argstr = [argstr, ')'];
+
+if max(size(length)) == 2, red=length(2); length=length(1); else red=1; end
+S=['Iteration '];
+
+i = 0;                                            % zero the run length counter
+ls_failed = 0;                             % no previous line search has failed
+fX = [];
+[f1 df1] = eval(argstr);                      % get function value and gradient
+i = i + (length<0);                                            % count epochs?!
+s = -df1;                                        % search direction is steepest
+d1 = -s'*s;                                                 % this is the slope
+z1 = red/(1-d1);                                  % initial step is red/(|s|+1)
+
+while i < abs(length)                                      % while not finished
+  i = i + (length>0);                                      % count iterations?!
+
+  X0 = X; f0 = f1; df0 = df1;                   % make a copy of current values
+  X = X + z1*s;                                             % begin line search
+  [f2 df2] = eval(argstr);
+  i = i + (length<0);                                          % count epochs?!
+  d2 = df2'*s;
+  f3 = f1; d3 = d1; z3 = -z1;             % initialize point 3 equal to point 1
+  if length>0, M = MAX; else M = min(MAX, -length-i); end
+  success = 0; limit = -1;                     % initialize quanteties
+  while 1
+    while ((f2 > f1+z1*RHO*d1) | (d2 > -SIG*d1)) & (M > 0) 
+      limit = z1;                                         % tighten the bracket
+      if f2 > f1
+        z2 = z3 - (0.5*d3*z3*z3)/(d3*z3+f2-f3);                 % quadratic fit
+      else
+        A = 6*(f2-f3)/z3+3*(d2+d3);                                 % cubic fit
+        B = 3*(f3-f2)-z3*(d3+2*d2);
+        z2 = (sqrt(B*B-A*d2*z3*z3)-B)/A;       % numerical error possible - ok!
+      end
+      if isnan(z2) | isinf(z2)
+        z2 = z3/2;                  % if we had a numerical problem then bisect
+      end
+      z2 = max(min(z2, INT*z3),(1-INT)*z3);  % don't accept too close to limits
+      z1 = z1 + z2;                                           % update the step
+      X = X + z2*s;
+      [f2 df2] = eval(argstr);
+      M = M - 1; i = i + (length<0);                           % count epochs?!
+      d2 = df2'*s;
+      z3 = z3-z2;                    % z3 is now relative to the location of z2
+    end
+    if f2 > f1+z1*RHO*d1 | d2 > -SIG*d1
+      break;                                                % this is a failure
+    elseif d2 > SIG*d1
+      success = 1; break;                                             % success
+    elseif M == 0
+      break;                                                          % failure
+    end
+    A = 6*(f2-f3)/z3+3*(d2+d3);                      % make cubic extrapolation
+    B = 3*(f3-f2)-z3*(d3+2*d2);
+    z2 = -d2*z3*z3/(B+sqrt(B*B-A*d2*z3*z3));        % num. error possible - ok!
+    if ~isreal(z2) | isnan(z2) | isinf(z2) | z2 < 0   % num prob or wrong sign?
+      if limit < -0.5                               % if we have no upper limit
+        z2 = z1 * (EXT-1);                 % the extrapolate the maximum amount
+      else
+        z2 = (limit-z1)/2;                                   % otherwise bisect
+      end
+    elseif (limit > -0.5) & (z2+z1 > limit)          % extraplation beyond max?
+      z2 = (limit-z1)/2;                                               % bisect
+    elseif (limit < -0.5) & (z2+z1 > z1*EXT)       % extrapolation beyond limit
+      z2 = z1*(EXT-1.0);                           % set to extrapolation limit
+    elseif z2 < -z3*INT
+      z2 = -z3*INT;
+    elseif (limit > -0.5) & (z2 < (limit-z1)*(1.0-INT))   % too close to limit?
+      z2 = (limit-z1)*(1.0-INT);
+    end
+    f3 = f2; d3 = d2; z3 = -z2;                  % set point 3 equal to point 2
+    z1 = z1 + z2; X = X + z2*s;                      % update current estimates
+    [f2 df2] = eval(argstr);
+    M = M - 1; i = i + (length<0);                             % count epochs?!
+    d2 = df2'*s;
+  end                                                      % end of line search
+
+  if success                                         % if line search succeeded
+    f1 = f2; fX = [fX' f1]';
+    fprintf('%s %4i | Cost: %4.6e\r', S, i, f1);
+    s = (df2'*df2-df1'*df2)/(df1'*df1)*s - df2;      % Polack-Ribiere direction
+    tmp = df1; df1 = df2; df2 = tmp;                         % swap derivatives
+    d2 = df1'*s;
+    if d2 > 0                                      % new slope must be negative
+      s = -df1;                              % otherwise use steepest direction
+      d2 = -s'*s;    
+    end
+    z1 = z1 * min(RATIO, d1/(d2-realmin));          % slope ratio but max RATIO
+    d1 = d2;
+    ls_failed = 0;                              % this line search did not fail
+  else
+    X = X0; f1 = f0; df1 = df0;  % restore point from before failed line search
+    if ls_failed | i > abs(length)          % line search failed twice in a row
+      break;                             % or we ran out of time, so we give up
+    end
+    tmp = df1; df1 = df2; df2 = tmp;                         % swap derivatives
+    s = -df1;                                                    % try steepest
+    d1 = -s'*s;
+    z1 = 1/(1-d1);                     
+    ls_failed = 1;                                    % this line search failed
+  end
+  if exist('OCTAVE_VERSION')
+    fflush(stdout);
+  end
+end
+fprintf('\n');
--- a/science/r/12/mlclass-ex3/lrCostFunction.m
+++ b/science/r/12/mlclass-ex3/lrCostFunction.m
@ -0,0 +1,57 @@
+function [J, grad] = lrCostFunction(theta, X, y, lambda)
+%LRCOSTFUNCTION Compute cost and gradient for logistic regression with 
+%regularization
+%   J = LRCOSTFUNCTION(theta, X, y, lambda) computes the cost of using
+%   theta as the parameter for regularized logistic regression and the
+%   gradient of the cost w.r.t. to the parameters. 
+
+% Initialize some useful values
+m = length(y); % number of training examples
+
+% You need to return the following variables correctly 
+J = 0;
+grad = zeros(size(theta));
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Compute the cost of a particular choice of theta.
+%               You should set J to the cost.
+%               Compute the partial derivatives and set grad to the partial
+%               derivatives of the cost w.r.t. each parameter in theta
+%
+% Hint: The computation of the cost function and gradients can be
+%       efficiently vectorized. For example, consider the computation
+%
+%           sigmoid(X * theta)
+%
+%       Each row of the resulting matrix will contain the value of the
+%       prediction for that example. You can make use of this to vectorize
+%       the cost function and gradient computations. 
+%
+% Hint: When computing the gradient of the regularized cost function, 
+%       there're many possible vectorized solutions, but one solution
+%       looks like:
+%           grad = (unregularized gradient for logistic regression)
+%           temp = theta; 
+%           temp(1) = 0;   % because we don't add anything for j = 0  
+%           grad = grad + YOUR_CODE_HERE (using the temp variable)
+%
+
+
+h = sigmoid(X * theta);
+base = - y .* log(h) - (1 - y) .* log(1 - h);
+J = 1 / m * sum(base, 1);
+grad_ = 1 / m * (X' * (h - y));
+
+J = J + lambda / (2 * m) * (theta' * theta);
+grad = grad_ + theta * lambda / m;
+grad(1) = grad_(1);
+
+
+
+
+
+
+
+% =============================================================
+
+end
--- a/science/r/12/mlclass-ex3/octave-workspace
+++ b/science/r/12/mlclass-ex3/octave-workspace
--- a/science/r/12/mlclass-ex3/oneVsAll.m
+++ b/science/r/12/mlclass-ex3/oneVsAll.m
@ -0,0 +1,72 @@
+function [all_theta] = oneVsAll(X, y, num_labels, lambda)
+%ONEVSALL trains multiple logistic regression classifiers and returns all
+%the classifiers in a matrix all_theta, where the i-th row of all_theta 
+%corresponds to the classifier for label i
+%   [all_theta] = ONEVSALL(X, y, num_labels, lambda) trains num_labels
+%   logisitc regression classifiers and returns each of these classifiers
+%   in a matrix all_theta, where the i-th row of all_theta corresponds 
+%   to the classifier for label i
+
+% Some useful variables
+m = size(X, 1);
+n = size(X, 2);
+
+% You need to return the following variables correctly 
+all_theta = zeros(num_labels, n + 1);
+
+% Add ones to the X data matrix
+X = [ones(m, 1) X];
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: You should complete the following code to train num_labels
+%               logistic regression classifiers with regularization
+%               parameter lambda. 
+%
+% Hint: theta(:) will return a column vector.
+%
+% Hint: You can use y == c to obtain a vector of 1's and 0's that tell use 
+%       whether the ground truth is true/false for this class.
+%
+% Note: For this assignment, we recommend using fmincg to optimize the cost
+%       function. It is okay to use a for-loop (for c = 1:num_labels) to
+%       loop over the different classes.
+%
+%       fmincg works similarly to fminunc, but is more efficient when we
+%       are dealing with large number of parameters.
+%
+% Example Code for fmincg:
+%
+%     % Set Initial theta
+%     initial_theta = zeros(n + 1, 1);
+%     
+%     % Set options for fminunc
+%     options = optimset('GradObj', 'on', 'MaxIter', 50);
+% 
+%     % Run fmincg to obtain the optimal theta
+%     % This function will return theta and the cost 
+%     [theta] = ...
+%         fmincg (@(t)(lrCostFunction(t, X, (y == c), lambda)), ...
+%                 initial_theta, options);
+%
+
+
+
+for c = 1:num_labels
+  initial_theta = zeros(n + 1, 1);
+  options = optimset('GradObj', 'on', 'MaxIter', 50);
+  % conjugate gradient
+  [theta] = fmincg(@(t)(lrCostFunction(t, X, (y == c), lambda)), initial_theta, options);
+  all_theta(c, :) = theta';
+end
+
+
+
+
+
+
+
+
+% =========================================================================
+
+
+end
--- a/science/r/12/mlclass-ex3/predict.m
+++ b/science/r/12/mlclass-ex3/predict.m
@ -0,0 +1,40 @@
+function p = predict(Theta1, Theta2, X)
+%PREDICT Predict the label of an input given a trained neural network
+%   p = PREDICT(Theta1, Theta2, X) outputs the predicted label of X given the
+%   trained weights of a neural network (Theta1, Theta2)
+
+% Useful values
+m = size(X, 1);
+num_labels = size(Theta2, 1);
+
+% You need to return the following variables correctly 
+p = zeros(size(X, 1), 1);
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Complete the following code to make predictions using
+%               your learned neural network. You should set p to a 
+%               vector containing labels between 1 to num_labels.
+%
+% Hint: The max function might come in useful. In particular, the max
+%       function can also return the index of the max element, for more
+%       information see 'help max'. If your examples are in rows, then, you
+%       can use max(A, [], 2) to obtain the max for each row.
+%
+
+
+% theta1 25 * 401
+% theta2 10 * 26
+% x 401 * 1
+X = [ones(m, 1) X];
+a1 = sigmoid(Theta1 * X'); % 25 * 1
+a1 = [ones(1, size(a1, 2)); a1]; % 26 * 1
+h = sigmoid(Theta2 * a1) % 10 * 1
+[max_probs, p] = max(h); % max, index 
+p = p';
+
+
+
+% =========================================================================
+
+
+end
--- a/science/r/12/mlclass-ex3/predictOneVsAll.m
+++ b/science/r/12/mlclass-ex3/predictOneVsAll.m
@ -0,0 +1,43 @@
+function p = predictOneVsAll(all_theta, X)
+%PREDICT Predict the label for a trained one-vs-all classifier. The labels 
+%are in the range 1..K, where K = size(all_theta, 1). 
+%  p = PREDICTONEVSALL(all_theta, X) will return a vector of predictions
+%  for each example in the matrix X. Note that X contains the examples in
+%  rows. all_theta is a matrix where the i-th row is a trained logistic
+%  regression theta vector for the i-th class. You should set p to a vector
+%  of values from 1..K (e.g., p = [1; 3; 1; 2] predicts classes 1, 3, 1, 2
+%  for 4 examples) 
+
+m = size(X, 1);
+num_labels = size(all_theta, 1);
+
+% You need to return the following variables correctly 
+p = zeros(size(X, 1), 1);
+
+% Add ones to the X data matrix
+X = [ones(m, 1) X];
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Complete the following code to make predictions using
+%               your learned logistic regression parameters (one-vs-all).
+%               You should set p to a vector of predictions (from 1 to
+%               num_labels).
+%
+% Hint: This code can be done all vectorized using the max function.
+%       In particular, the max function can also return the index of the 
+%       max element, for more information see 'help max'. If your examples 
+%       are in rows, then, you can use max(A, [], 2) to obtain the max 
+%       for each row.
+%       
+
+
+h = sigmoid(X * all_theta');
+[max_probs, p] = max(h, [], 2);
+
+
+
+
+% =========================================================================
+
+
+end
--- a/science/r/12/mlclass-ex3/sigmoid.m
+++ b/science/r/12/mlclass-ex3/sigmoid.m
@ -0,0 +1,6 @@
+function g = sigmoid(z)
+%SIGMOID Compute sigmoid functoon
+%   J = SIGMOID(z) computes the sigmoid of z.
+
+g = 1.0 ./ (1.0 + exp(-z));
+end
--- a/science/r/12/mlclass-ex3/submit.m
+++ b/science/r/12/mlclass-ex3/submit.m
@ -0,0 +1,333 @@
+function submit(partId)
+%SUBMIT Submit your code and output to the ml-class servers
+%   SUBMIT() will connect to the ml-class server and submit your solution
+
+  fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ...
+          homework_id());
+  if ~exist('partId', 'var') || isempty(partId)
+    partId = promptPart();
+  end
+  
+  % Check valid partId
+  partNames = validParts();
+  if ~isValidPartId(partId)
+    fprintf('!! Invalid homework part selected.\n');
+    fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1);
+    fprintf('!! Submission Cancelled\n');
+    return
+  end
+
+  [login password] = loginPrompt();
+  if isempty(login)
+    fprintf('!! Submission Cancelled\n');
+    return
+  end
+
+  fprintf('\n== Connecting to ml-class ... '); 
+  if exist('OCTAVE_VERSION') 
+    fflush(stdout);
+  end
+  
+  % Setup submit list
+  if partId == numel(partNames) + 1
+    submitParts = 1:numel(partNames);
+  else
+    submitParts = [partId];
+  end
+
+  for s = 1:numel(submitParts)
+    % Submit this part
+    partId = submitParts(s);
+    
+    % Get Challenge
+    [login, ch, signature] = getChallenge(login);
+    if isempty(login) || isempty(ch) || isempty(signature)
+      % Some error occured, error string in first return element.
+      fprintf('\n!! Error: %s\n\n', login);
+      return
+    end
+  
+    % Attempt Submission with Challenge
+    ch_resp = challengeResponse(login, password, ch);
+    [result, str] = submitSolution(login, ch_resp, partId, output(partId), ...
+                                 source(partId), signature);
+                                 
+    fprintf('\n== [ml-class] Submitted Homework %s - Part %d - %s\n', ...
+            homework_id(), partId, partNames{partId});
+    fprintf('== %s\n', strtrim(str));
+    if exist('OCTAVE_VERSION') 
+      fflush(stdout);
+    end
+  end
+  
+end
+
+% ================== CONFIGURABLES FOR EACH HOMEWORK ==================
+
+function id = homework_id() 
+  id = '3';
+end
+
+function [partNames] = validParts()
+  partNames = { 'Vectorized Logistic Regression ', ...
+                'One-vs-all classifier training', ...
+                'One-vs-all classifier prediction', ...
+                'Neural network prediction function' ...
+                };
+end
+
+function srcs = sources()
+  % Separated by part
+  srcs = { { 'lrCostFunction.m' }, ...
+           { 'oneVsAll.m' }, ...
+           { 'predictOneVsAll.m' }, ...
+           { 'predict.m' } };
+end
+
+function out = output(partId)
+  % Random Test Cases
+  X = [ones(20,1) (exp(1) * sin(1:1:20))' (exp(0.5) * cos(1:1:20))'];
+  y = sin(X(:,1) + X(:,2)) > 0;
+  Xm = [ -1 -1 ; -1 -2 ; -2 -1 ; -2 -2 ; ...
+          1 1 ;  1 2 ;  2 1 ; 2 2 ; ...
+         -1 1 ;  -1 2 ;  -2 1 ; -2 2 ; ...
+          1 -1 ; 1 -2 ;  -2 -1 ; -2 -2 ];
+  ym = [ 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 ]';
+  t1 = sin(reshape(1:2:24, 4, 3));
+  t2 = cos(reshape(1:2:40, 4, 5));
+
+  if partId == 1
+    [J, grad] = lrCostFunction([0.25 0.5 -0.5]', X, y, 0.1);
+    out = sprintf('%0.5f ', J);
+    out = [out sprintf('%0.5f ', grad)];
+  elseif partId == 2
+    out = sprintf('%0.5f ', oneVsAll(Xm, ym, 4, 0.1));
+  elseif partId == 3
+    out = sprintf('%0.5f ', predictOneVsAll(t1, Xm));
+  elseif partId == 4
+    out = sprintf('%0.5f ', predict(t1, t2, Xm));
+  end 
+end
+
+function url = challenge_url()
+  url = 'http://www.ml-class.org/course/homework/challenge';
+end
+
+function url = submit_url()
+  url = 'http://www.ml-class.org/course/homework/submit';
+end
+
+% ========================= CHALLENGE HELPERS =========================
+
+function src = source(partId)
+  src = '';
+  src_files = sources();
+  if partId <= numel(src_files)
+      flist = src_files{partId};
+      for i = 1:numel(flist)
+          fid = fopen(flist{i});
+          while ~feof(fid)
+            line = fgets(fid);
+            src = [src line];
+          end
+          fclose(fid);
+          src = [src '||||||||'];
+      end
+  end
+end
+
+function ret = isValidPartId(partId)
+  partNames = validParts();
+  ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1);
+end
+
+function partId = promptPart()
+  fprintf('== Select which part(s) to submit:\n', ...
+          homework_id());
+  partNames = validParts();
+  srcFiles = sources();
+  for i = 1:numel(partNames)
+    fprintf('==   %d) %s [', i, partNames{i});
+    fprintf(' %s ', srcFiles{i}{:});
+    fprintf(']\n');
+  end
+  fprintf('==   %d) All of the above \n==\nEnter your choice [1-%d]: ', ...
+          numel(partNames) + 1, numel(partNames) + 1);
+  selPart = input('', 's');
+  partId = str2num(selPart);
+  if ~isValidPartId(partId)
+    partId = -1;
+  end
+end
+
+function [email,ch,signature] = getChallenge(email)
+  str = urlread(challenge_url(), 'post', {'email_address', email});
+
+  str = strtrim(str);
+  [email, str] = strtok (str, '|');
+  [ch, str] = strtok (str, '|');
+  [signature, str] = strtok (str, '|');
+end
+
+
+function [result, str] = submitSolution(email, ch_resp, part, output, ...
+                                        source, signature)
+
+  params = {'homework', homework_id(), ...
+            'part', num2str(part), ...
+            'email', email, ...
+            'output', output, ...
+            'source', source, ...
+            'challenge_response', ch_resp, ...
+            'signature', signature};
+
+  str = urlread(submit_url(), 'post', params);
+  
+  % Parse str to read for success / failure
+  result = 0;
+
+end
+
+% =========================== LOGIN HELPERS ===========================
+
+function [login password] = loginPrompt()
+  % Prompt for password
+  [login password] = basicPrompt();
+  
+  if isempty(login) || isempty(password)
+    login = []; password = [];
+  end
+end
+
+
+function [login password] = basicPrompt()
+  login = input('Login (Email address): ', 's');
+  password = input('Password: ', 's');
+end
+
+
+function [str] = challengeResponse(email, passwd, challenge)
+  salt = ')~/|]QMB3[!W`?OVt7qC"@+}';
+  str = sha1([challenge sha1([salt email passwd])]);
+  sel = randperm(numel(str));
+  sel = sort(sel(1:16));
+  str = str(sel);
+end
+
+
+% =============================== SHA-1 ================================
+
+function hash = sha1(str)
+  
+  % Initialize variables
+  h0 = uint32(1732584193);
+  h1 = uint32(4023233417);
+  h2 = uint32(2562383102);
+  h3 = uint32(271733878);
+  h4 = uint32(3285377520);
+  
+  % Convert to word array
+  strlen = numel(str);
+
+  % Break string into chars and append the bit 1 to the message
+  mC = [double(str) 128];
+  mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')];
+  
+  numB = strlen * 8;
+  if exist('idivide')
+    numC = idivide(uint32(numB + 65), 512, 'ceil');
+  else
+    numC = ceil(double(numB + 65)/512);
+  end
+  numW = numC * 16;
+  mW = zeros(numW, 1, 'uint32');
+  
+  idx = 1;
+  for i = 1:4:strlen + 1
+    mW(idx) = bitor(bitor(bitor( ...
+                  bitshift(uint32(mC(i)), 24), ...
+                  bitshift(uint32(mC(i+1)), 16)), ...
+                  bitshift(uint32(mC(i+2)), 8)), ...
+                  uint32(mC(i+3)));
+    idx = idx + 1;
+  end
+  
+  % Append length of message
+  mW(numW - 1) = uint32(bitshift(uint64(numB), -32));
+  mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32));
+
+  % Process the message in successive 512-bit chs
+  for cId = 1 : double(numC)
+    cSt = (cId - 1) * 16 + 1;
+    cEnd = cId * 16;
+    ch = mW(cSt : cEnd);
+    
+    % Extend the sixteen 32-bit words into eighty 32-bit words
+    for j = 17 : 80
+      ch(j) = ch(j - 3);
+      ch(j) = bitxor(ch(j), ch(j - 8));
+      ch(j) = bitxor(ch(j), ch(j - 14));
+      ch(j) = bitxor(ch(j), ch(j - 16));
+      ch(j) = bitrotate(ch(j), 1);
+    end
+  
+    % Initialize hash value for this ch
+    a = h0;
+    b = h1;
+    c = h2;
+    d = h3;
+    e = h4;
+    
+    % Main loop
+    for i = 1 : 80
+      if(i >= 1 && i <= 20)
+        f = bitor(bitand(b, c), bitand(bitcmp(b), d));
+        k = uint32(1518500249);
+      elseif(i >= 21 && i <= 40)
+        f = bitxor(bitxor(b, c), d);
+        k = uint32(1859775393);
+      elseif(i >= 41 && i <= 60)
+        f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d));
+        k = uint32(2400959708);
+      elseif(i >= 61 && i <= 80)
+        f = bitxor(bitxor(b, c), d);
+        k = uint32(3395469782);
+      end
+      
+      t = bitrotate(a, 5);
+      t = bitadd(t, f);
+      t = bitadd(t, e);
+      t = bitadd(t, k);
+      t = bitadd(t, ch(i));
+      e = d;
+      d = c;
+      c = bitrotate(b, 30);
+      b = a;
+      a = t;
+      
+    end
+    h0 = bitadd(h0, a);
+    h1 = bitadd(h1, b);
+    h2 = bitadd(h2, c);
+    h3 = bitadd(h3, d);
+    h4 = bitadd(h4, e);
+
+  end
+
+  hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]);
+  
+  hash = lower(hash);
+
+end
+
+function ret = bitadd(iA, iB)
+  ret = double(iA) + double(iB);
+  ret = bitset(ret, 33, 0);
+  ret = uint32(ret);
+end
+
+function ret = bitrotate(iA, places)
+  t = bitshift(iA, places - 32);
+  ret = bitshift(iA, places);
+  ret = bitor(ret, t);
+end
--- a/science/r/12/mlclass-ex3/submitWeb.m
+++ b/science/r/12/mlclass-ex3/submitWeb.m
@ -0,0 +1,349 @@
+function submitWeb(partId)
+%SUBMITWEB Generates a base64 encoded string for web-based submissions
+%   SUBMITWEB() will generate a base64 encoded string so that you can submit your
+%   solutions via a web form
+
+  fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ...
+          homework_id());
+  if ~exist('partId', 'var') || isempty(partId)
+    partId = promptPart();
+  end
+  
+  % Check valid partId
+  partNames = validParts();
+  if ~isValidPartId(partId)
+    fprintf('!! Invalid homework part selected.\n');
+    fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames));
+    fprintf('!! Submission Cancelled\n');
+    return
+  end
+
+  [login] = loginPrompt();
+  if isempty(login)
+    fprintf('!! Submission Cancelled\n');
+    return
+  end
+  
+  [result] = submitSolution(login, partId, output(partId), ...
+                            source(partId));
+  result = base64encode(result);
+
+  fprintf('\nSave as submission file [submit_ex%s_part%d.txt]: ', ...
+          homework_id(), partId);
+  saveAsFile = input('', 's');
+  if (isempty(saveAsFile))
+    saveAsFile = sprintf('submit_ex%s_part%d.txt', homework_id(), partId);
+  end
+
+  fid = fopen(saveAsFile, 'w');
+  if (fid)
+    fwrite(fid, result);
+    fclose(fid);
+    fprintf('\nSaved your solutions to %s.\n\n', saveAsFile);
+    fprintf(['You can now submit your solutions through the web \n' ...
+             'form in the programming exercises. Select the corresponding \n' ...
+             'programming exercise to access the form.\n']);
+
+  else
+    fprintf('Unable to save to %s\n\n', saveAsFile);
+    fprintf(['You can create a submission file by saving the \n' ...
+             'following text in a file: (press enter to continue)\n\n']);
+    pause;
+    fprintf(result);
+  end                  
+
+end
+
+% ================== CONFIGURABLES FOR EACH HOMEWORK ==================
+
+function id = homework_id() 
+  id = '3';
+end
+
+function [partNames] = validParts()
+  partNames = { 'Vectorized Logistic Regression ', ...
+                'One-vs-all classifier training', ...
+                'One-vs-all classifier prediction', ...
+                'Neural network prediction function' ...
+                };
+end
+
+function srcs = sources()
+  % Separated by part
+  srcs = { { 'lrCostFunction.m' }, ...
+           { 'oneVsAll.m' }, ...
+           { 'predictOneVsAll.m' }, ...
+           { 'predict.m' } };
+end
+
+function out = output(partId)
+  % Random Test Cases
+  X = [ones(20,1) (exp(1) * sin(1:1:20))' (exp(0.5) * cos(1:1:20))'];
+  y = sin(X(:,1) + X(:,2)) > 0;
+  Xm = [ -1 -1 ; -1 -2 ; -2 -1 ; -2 -2 ; ...
+          1 1 ;  1 2 ;  2 1 ; 2 2 ; ...
+         -1 1 ;  -1 2 ;  -2 1 ; -2 2 ; ...
+          1 -1 ; 1 -2 ;  -2 -1 ; -2 -2 ];
+  ym = [ 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 ]';
+  t1 = sin(reshape(1:2:24, 4, 3));
+  t2 = cos(reshape(1:2:40, 4, 5));
+
+  if partId == 1
+    [J, grad] = lrCostFunction([0.25 0.5 -0.5]', X, y, 0.1);
+    out = sprintf('%0.5f ', J);
+    out = [out sprintf('%0.5f ', grad)];
+  elseif partId == 2
+    out = sprintf('%0.5f ', oneVsAll(Xm, ym, 4, 0.1));
+  elseif partId == 3
+    out = sprintf('%0.5f ', predictOneVsAll(t1, Xm));
+  elseif partId == 4
+    out = sprintf('%0.5f ', predict(t1, t2, Xm));
+  end 
+end
+
+
+% ========================= SUBMIT HELPERS =========================
+
+function src = source(partId)
+  src = '';
+  src_files = sources();
+  if partId <= numel(src_files)
+      flist = src_files{partId};
+      for i = 1:numel(flist)
+          fid = fopen(flist{i});
+          while ~feof(fid)
+            line = fgets(fid);
+            src = [src line];
+          end
+          fclose(fid);
+          src = [src '||||||||'];
+      end
+  end
+end
+
+function ret = isValidPartId(partId)
+  partNames = validParts();
+  ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames));
+end
+
+function partId = promptPart()
+  fprintf('== Select which part(s) to submit:\n', ...
+          homework_id());
+  partNames = validParts();
+  srcFiles = sources();
+  for i = 1:numel(partNames)
+    fprintf('==   %d) %s [', i, partNames{i});
+    fprintf(' %s ', srcFiles{i}{:});
+    fprintf(']\n');
+  end
+  fprintf('\nEnter your choice [1-%d]: ', ...
+          numel(partNames));
+  selPart = input('', 's');
+  partId = str2num(selPart);
+  if ~isValidPartId(partId)
+    partId = -1;
+  end
+end
+
+
+function [result, str] = submitSolution(email, part, output, source)
+
+  result = ['a:5:{' ...
+            p_s('homework') p_s64(homework_id()) ...
+            p_s('part') p_s64(part) ...
+            p_s('email') p_s64(email) ...
+            p_s('output') p_s64(output) ...
+            p_s('source') p_s64(source) ...
+            '}'];
+
+end
+
+function s = p_s(str)
+   s = ['s:' num2str(numel(str)) ':"' str '";'];
+end
+
+function s = p_s64(str)
+   str = base64encode(str, '');
+   s = ['s:' num2str(numel(str)) ':"' str '";'];
+end
+
+% =========================== LOGIN HELPERS ===========================
+
+function [login] = loginPrompt()
+  % Prompt for password
+  [login] = basicPrompt();
+end
+
+
+function [login] = basicPrompt()
+  login = input('Login (Email address): ', 's');
+end
+
+
+% =========================== Base64 Encoder ============================
+% Thanks to Peter John Acklam
+%
+
+function y = base64encode(x, eol)
+%BASE64ENCODE Perform base64 encoding on a string.
+%
+%   BASE64ENCODE(STR, EOL) encode the given string STR.  EOL is the line ending
+%   sequence to use; it is optional and defaults to '\n' (ASCII decimal 10).
+%   The returned encoded string is broken into lines of no more than 76
+%   characters each, and each line will end with EOL unless it is empty.  Let
+%   EOL be empty if you do not want the encoded string broken into lines.
+%
+%   STR and EOL don't have to be strings (i.e., char arrays).  The only
+%   requirement is that they are vectors containing values in the range 0-255.
+%
+%   This function may be used to encode strings into the Base64 encoding
+%   specified in RFC 2045 - MIME (Multipurpose Internet Mail Extensions).  The
+%   Base64 encoding is designed to represent arbitrary sequences of octets in a
+%   form that need not be humanly readable.  A 65-character subset
+%   ([A-Za-z0-9+/=]) of US-ASCII is used, enabling 6 bits to be represented per
+%   printable character.
+%
+%   Examples
+%   --------
+%
+%   If you want to encode a large file, you should encode it in chunks that are
+%   a multiple of 57 bytes.  This ensures that the base64 lines line up and
+%   that you do not end up with padding in the middle.  57 bytes of data fills
+%   one complete base64 line (76 == 57*4/3):
+%
+%   If ifid and ofid are two file identifiers opened for reading and writing,
+%   respectively, then you can base64 encode the data with
+%
+%      while ~feof(ifid)
+%         fwrite(ofid, base64encode(fread(ifid, 60*57)));
+%      end
+%
+%   or, if you have enough memory,
+%
+%      fwrite(ofid, base64encode(fread(ifid)));
+%
+%   See also BASE64DECODE.
+
+%   Author:      Peter John Acklam
+%   Time-stamp:  2004-02-03 21:36:56 +0100
+%   E-mail:      pjacklam@online.no
+%   URL:         http://home.online.no/~pjacklam
+
+   if isnumeric(x)
+      x = num2str(x);
+   end
+
+   % make sure we have the EOL value
+   if nargin < 2
+      eol = sprintf('\n');
+   else
+      if sum(size(eol) > 1) > 1
+         error('EOL must be a vector.');
+      end
+      if any(eol(:) > 255)
+         error('EOL can not contain values larger than 255.');
+      end
+   end
+
+   if sum(size(x) > 1) > 1
+      error('STR must be a vector.');
+   end
+
+   x   = uint8(x);
+   eol = uint8(eol);
+
+   ndbytes = length(x);                 % number of decoded bytes
+   nchunks = ceil(ndbytes / 3);         % number of chunks/groups
+   nebytes = 4 * nchunks;               % number of encoded bytes
+
+   % add padding if necessary, to make the length of x a multiple of 3
+   if rem(ndbytes, 3)
+      x(end+1 : 3*nchunks) = 0;
+   end
+
+   x = reshape(x, [3, nchunks]);        % reshape the data
+   y = repmat(uint8(0), 4, nchunks);    % for the encoded data
+
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   % Split up every 3 bytes into 4 pieces
+   %
+   %    aaaaaabb bbbbcccc ccdddddd
+   %
+   % to form
+   %
+   %    00aaaaaa 00bbbbbb 00cccccc 00dddddd
+   %
+   y(1,:) = bitshift(x(1,:), -2);                  % 6 highest bits of x(1,:)
+
+   y(2,:) = bitshift(bitand(x(1,:), 3), 4);        % 2 lowest bits of x(1,:)
+   y(2,:) = bitor(y(2,:), bitshift(x(2,:), -4));   % 4 highest bits of x(2,:)
+
+   y(3,:) = bitshift(bitand(x(2,:), 15), 2);       % 4 lowest bits of x(2,:)
+   y(3,:) = bitor(y(3,:), bitshift(x(3,:), -6));   % 2 highest bits of x(3,:)
+
+   y(4,:) = bitand(x(3,:), 63);                    % 6 lowest bits of x(3,:)
+
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   % Now perform the following mapping
+   %
+   %   0  - 25  ->  A-Z
+   %   26 - 51  ->  a-z
+   %   52 - 61  ->  0-9
+   %   62       ->  +
+   %   63       ->  /
+   %
+   % We could use a mapping vector like
+   %
+   %   ['A':'Z', 'a':'z', '0':'9', '+/']
+   %
+   % but that would require an index vector of class double.
+   %
+   z = repmat(uint8(0), size(y));
+   i =           y <= 25;  z(i) = 'A'      + double(y(i));
+   i = 26 <= y & y <= 51;  z(i) = 'a' - 26 + double(y(i));
+   i = 52 <= y & y <= 61;  z(i) = '0' - 52 + double(y(i));
+   i =           y == 62;  z(i) = '+';
+   i =           y == 63;  z(i) = '/';
+   y = z;
+
+   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+   % Add padding if necessary.
+   %
+   npbytes = 3 * nchunks - ndbytes;     % number of padding bytes
+   if npbytes
+      y(end-npbytes+1 : end) = '=';     % '=' is used for padding
+   end
+
+   if isempty(eol)
+
+      % reshape to a row vector
+      y = reshape(y, [1, nebytes]);
+
+   else
+
+      nlines = ceil(nebytes / 76);      % number of lines
+      neolbytes = length(eol);          % number of bytes in eol string
+
+      % pad data so it becomes a multiple of 76 elements
+      y = [y(:) ; zeros(76 * nlines - numel(y), 1)];
+      y(nebytes + 1 : 76 * nlines) = 0;
+      y = reshape(y, 76, nlines);
+
+      % insert eol strings
+      eol = eol(:);
+      y(end + 1 : end + neolbytes, :) = eol(:, ones(1, nlines));
+
+      % remove padding, but keep the last eol string
+      m = nebytes + neolbytes * (nlines - 1);
+      n = (76+neolbytes)*nlines - neolbytes;
+      y(m+1 : n) = '';
+
+      % extract and reshape to row vector
+      y = reshape(y, 1, m+neolbytes);
+
+   end
+
+   % output is a character array
+   y = char(y);
+
+end
--- a/science/r/2.rmd
+++ b/science/r/2.rmd
@ -0,0 +1,61 @@
+---
+title: "Lab2: Introduction to R, exploring the box-plot"
+author: "Vladislav Litvinov <vlad@sek1ro>"
+output:
+  pdf_document:
+  toc_float: TRUE
+---
+# Data preparation
+```{r}
+df = read.csv("./zipIncome.txt", sep = "|")
+colnames(df) <- c("zipCode", "income")
+
+summary(df)
+mean(df$income)
+if (any(is.na(df[,2]))) {
+  na = which(is.na(df[,2]))
+  df = df[-na,]
+}
+
+mean(df$income)
+median(df$income, na.rm=TRUE)
+```
+# Histograms and box-plot "whiskers"
+```{r}
+plot(y=df$income, x=df$zipCode, xlab="income", ylab="zipCode")
+df$incomelog = log10(df$income)
+hist(df$incomelog, breaks=80)
+print(min_incomelog <- log10(7e3))
+print(max_incomelog <- log10(2e5))
+print(avg_incomelog <- median(df$incomelog))
+
+df <- subset(df, 7e3 < df$income & df$income < 2e5)
+hist(df$incomelog, breaks=80)
+summary(df)
+
+boxplot(incomelog ~ zipCode, data=df, main="boxplot", xlab="zipCode", ylab="incomelog")
+
+library(ggplot2)
+
+ggplot(df, aes(x=zipCode, y=income, color=zipCode)) +
+  geom_point(
+    position = position_jitter(width = 0.2),
+    alpha = 0.2,
+  ) +
+  geom_boxplot(
+    alpha = 0.5,
+    outlier.shape = NA,
+    width = 0.6,
+    fill = "white",
+    color = "black"
+  ) +
+  scale_y_log10(
+    breaks = c(1e4, 25e3, 5e4, 1e5, 2e5, 5e5)
+  ) +
+  labs(
+    title = "Income distribution by ZIP codes",
+    subtitle = "Scatter plot jitter",
+  ) +
+  theme_minimal()
+  
+```
--- a/science/r/4.rmd
+++ b/science/r/4.rmd
@ -0,0 +1,158 @@
+---
+title: "Lab4: K-means, DB-scan and dendrograms"
+author: "Vladislav Litvinov <vlad@sek1ro>"
+output:
+  pdf_document:
+  toc_float: TRUE
+---
+# Data preparationc
+```{r}
+setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
+load("./income_elec_state.Rdata")
+df = income_elec_state
+df$incomelog = log10(df$income)
+remove(income_elec_state)
+```
+# Function to compute Within-Cluster Sum of Squares for choosing optimal K
+```{r}
+elbow_wss = function(df) {
+  max_k = 10
+  wss = numeric(max_k)
+  
+  for (i in 1:max_k) {
+    res = kmeans(df[,1:2], centers = i)
+    wss[i] = res$tot.withinss 
+  }
+  
+  plot(1:max_k, wss, type="b")
+  
+  wss_diff = diff(wss)
+  wss_ratio = wss_diff[-1] / wss_diff[-length(wss_diff)]
+  return(which.min(wss_ratio))
+}
+```
+# Scatter-plot: elec vs income and log-income
+```{r}
+library(ggplot2)
+
+plot_kmeans = function(data, k, log) {
+  res = kmeans(data, centers = k)
+  centers = as.data.frame(res$centers)
+  centers$cluster = as.factor(1:k)
+  data$cluster = as.factor(res$cluster)
+  data$state = rownames(df)
+  
+  plt = ggplot() +
+    geom_point(
+      data = data,
+      aes(
+        x = income,
+        y = elec, 
+        color = cluster
+      )
+    ) +
+    geom_text(
+      data = data,
+      vjust = 1.5,
+      size = 2,
+      aes(
+        x = income,
+        y = elec,
+        label = state
+      )
+    ) +
+    geom_point(
+      data = centers,
+      shape = 17,
+      size = 5,
+      aes(
+        x = income,
+        y = elec,
+        color = cluster,
+      )
+    ) +
+    theme_minimal()
+  
+  if (log) {
+    plt = plt + scale_x_log10() + scale_y_log10()
+  }
+  
+  print(plt)
+}
+
+data = df[,c("income", "elec")]
+datalog = df[,c("incomelog", "elec")]
+k = elbow_wss(data)
+klog = elbow_wss(datalog)
+plot_kmeans(data, k, log=FALSE)
+plot_kmeans(data, klog, log=TRUE)
+
+```
+# Map of USA
+```{r}
+library(maps)
+res = kmeans(data, centers = k)
+map_color = res$cluster[order(names(res$cluster))]
+map("state", fill = TRUE, col = map_color)
+
+Q1 = quantile(df$elec, 0.25)
+Q3 = quantile(df$elec, 0.75)
+IQR = Q3 - Q1
+min = Q1 - 1.5 * IQR
+max = Q3 + 1.5 * IQR
+df = subset(df,  df$elec > min & df$elec < max)
+
+data = df[,c("income", "elec")]
+datalog = df[,c("incomelog", "elec")]
+k = elbow_wss(data)
+klog = elbow_wss(datalog)
+plot_kmeans(data, k, log=FALSE)
+plot_kmeans(data, klog, log=TRUE)
+
+```
+# Differences within dendrogramm algo: 'single', 'complete', 'ward.D', 'average'
+```{r}
+
+library(ggdendro)
+
+
+plot_hclust = function(df, linkage, k) {
+  data = df[,c("income", "elec")]
+  distance = dist(data, method = "euclidean")
+  clust = hclust(distance, method = linkage)
+  data$cluster = as.factor(cutree(clust, k = k))
+  data$state = rownames(df)
+  print(cutree(clust, k = k))
+  print(data)
+  plt = ggplot() +
+    geom_point(
+      data = data,
+      aes(
+        x = income,
+        y = elec, 
+        color = cluster
+      )
+    ) +
+    geom_text(
+      data = data,
+      vjust = 1.5,
+      size = 2,
+      aes(
+        x = income,
+        y = elec,
+        label = state
+      )
+    )
+    theme_minimal()
+    print(plt)
+}
+
+plot_hclust(data, "average", 5)
+
+distance = dist(data, method = "euclidean")
+
+clust = hclust(distance, method = "single")
+plot(ggdendrogram(clust))
+
+cutree(clust, k = 3)
+```
--- a/science/r/5.rmd
+++ b/science/r/5.rmd
@ -0,0 +1,103 @@
+---
+title: "Lab5: Associative rules, Apriori"
+author: "Vladislav Litvinov <vlad@sek1ro>"
+output:
+  pdf_document:
+  toc_float: TRUE
+---
+# Histogram of transaction frequencies
+```{r}
+setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
+library(arules)
+library(arulesViz)
+ts = read.transactions("AssociationRules.csv",
+                       sep = " ",
+                       rm.duplicates = TRUE)
+
+itemFrequencyPlot(ts, type = "absolute", topN = 10)
+ift = sort(itemFrequency(ts), decreasing = TRUE)
+
+(most_frequent_item = ift[1])
+(max_ts_size = max(size(ts)))
+```
+# Model training. Rules computing
+```{r}
+rules = apriori(ts, parameter = list(support = 0.01, confidence = 0))
+length(rules)
+plot(rules, jitter = 0)
+
+rules50 = apriori(ts, parameter = list(support = 0.01, confidence = 0.5))
+length(rules50)
+plot(rules50, jitter = 0)
+```
+# Manual threshold applying: confidence = 0.5, plots comparsion
+```{r}
+library(ggplot2)
+asc = function(q, colors = c("lightgray", "red")) {
+  q = q[order(q$lift), ]
+  ggplot(q, aes(x = support, y = confidence, color = lift)) +
+    geom_point() +
+    ylim(0, 1) +
+    xlim(0, 0.5) +
+    theme_minimal() +
+      scale_color_gradientn(
+      colors = colors,
+      name = "Lift",
+      limits = c(min(q$lift), max(q$lift))
+    )
+}
+
+quality50 = as.data.frame(quality(rules50))
+asc(quality50, colors = c("navy", "cyan"))
+
+quality = as.data.frame(quality(rules))
+asc(subset(quality, quality$confidence > 0.5))
+```
+# Top 3 rules by lift. 'Relation' graph and matrix
+```{r}
+plot(rules, measure = c("support", "lift"), shading = "confidence")
+plot(rules)
+
+filt_rules = rules[which(quality(rules)$confidence > 0.8)]
+quality = as.data.frame(quality(filt_rules))
+quality = quality[order(-quality$lift),]
+tail(quality, 10)
+
+plot(filt_rules,
+     method = "matrix",
+     shading = c("lift", "confidence"),
+     engine = "grid")
+
+top3_rules = head(sort(filt_rules, by = "lift", decreasing = TRUE), 3)
+plot(top3_rules, method = "graph")
+```
+# Random picking of train and test datasets
+```{r}
+train_set = ts[1:8000]
+test_set = ts[8001:10000]
+
+train_rules = apriori(train_set, parameter = list(support = 0.01, confidence = 0.5))
+test_quality = interestMeasure(train_rules, 
+                                measure = c("support", "confidence", "lift", "coverage"),
+                                transactions = test_set)
+comparison <- data.frame(
+  train_support = quality(train_rules)$support[1:10],
+  test_support = test_quality$support[1:10],
+  train_lift = quality(train_rules)$lift[1:10],
+  test_lift = test_quality$lift[1:10]
+)
+
+print(comparison)
+plot(comparison$train_lift, comparison$test_lift,
+     xlab = "train lift", 
+     ylab = "test lift",
+     pch = 19)
+abline(0, 1, lty = 2)
+
+# График для support
+plot(comparison$train_support, comparison$test_support,
+     xlab = "train support", 
+     ylab = "test support",
+     pch = 19)
+abline(0, 1, lty = 2)
+```
--- a/science/r/6.rmd
+++ b/science/r/6.rmd
@ -0,0 +1,97 @@
+---
+title: "Lab6: Linear regression"
+author: "Vladislav Litvinov <vlad@sek1ro>"
+output:
+  pdf_document:
+  toc_float: TRUE
+---
+# Data preparation
+```{r}
+setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
+df = read.csv('zeta.csv')
+head(df)
+df = df[df$sex == 'F', -which(names(df) %in% c("X", "zcta", "sex"))]
+df = subset(df, 8 < meaneducation & meaneducation < 18 &
+              10000 < meanhouseholdincome & meanhouseholdincome < 200000 &
+              0 < meanemployment & meanemployment < 3 &
+              20 < meanage & meanage < 60)
+
+log_income = log10(df$meanhouseholdincome)
+colnames(df) = c("age", "education", "employment", "householdincome")
+```
+# Linear regression graph
+```{r}
+library(ggplot2)
+ggplot(df, aes(x = age, y = log_income)) +
+  geom_point() +
+  geom_smooth(method = "lm") +
+  theme_minimal()
+  
+lmmod = lm(log_income ~ age, data = df)
+summary(lmmod)
+```
+
+# Formulas for Student's t-test and p-value
+> xm = sum(xi) / n
+sd = sqrt(sum(xi - xm) / (n - 1))
+mr = sd / sqrt(n)
+t = (xm1 - xm2) / sqrt(mr1 ^ 2 + mr2 ^ 2)
+f = (n1 + n2) - 2
+p = 0,03 probability of obtaining test results at least as extreme as the result actually observed
+
+```{r}
+ggplot(df, aes(x = education, y = log_income)) +
+  geom_point() +
+  geom_smooth(method = "lm") +
+  theme_minimal()
+
+lmmod = lm(log_income ~ education, data = df)
+summary(lmmod)
+
+ggplot(df, aes(x = employment, y = log_income)) +
+  geom_point() +
+  geom_smooth(method = "lm") +
+  theme_minimal()
+
+lmmod = lm(log_income ~ employment, data = df)
+summary(lmmod)
+
+lmmod = lm(householdincome ~ age + education + employment, data = df)
+summary(lmmod)
+```
+# Random test and train datasets slices
+```{r}
+set.seed(Sys.Date())
+test_idx = sample(1:nrow(df), 5000, replace = FALSE)
+test_df = df[test_idx, ]
+train_df = df[-test_idx, ]
+
+lmmod = lm(householdincome ~ age + education + employment, data = train_df)
+test_df$p_income = predict(lmmod, newdata = test_df)
+
+ggplot(test_df, aes(x = p_income, y = householdincome)) +
+  geom_abline(intercept = 0, slope = 1) +
+  geom_point()+
+  theme_minimal()
+```
+# Error measurements: MSE, RMSE, MAE, Bias
+```{r}
+test_df = test_df[order(test_df$p_income), ]
+slice_n = 10
+slice_size = floor(nrow(test_df) / slice_n)
+for (i in 0 : (slice_n - 1)) {
+  start_idx = slice_size * i + 1
+  end_idx = ifelse(i == slice_n - 1, nrow(test_df), slice_size * (i + 1))
+  slice = test_df[start_idx : end_idx,]
+  
+  error = slice$p_income - slice$householdincome
+  rmse = sqrt(mean(error ^ 2))
+  mae = mean(abs(error))
+  bias = mean(error)
+  cat(sprintf("range: %.2f-%.2f, RMSE: %.2f, MAE: %.2f, Bias: %.2f\n",
+              test_df[start_idx, ]$p_income,
+              test_df[end_idx, ]$p_income,
+              rmse, mae, bias)
+      )
+}
+```
--- a/science/r/6/ex1.pdf
+++ b/science/r/6/ex1.pdf
--- a/science/r/6/mlclass-ex1/computeCost.m
+++ b/science/r/6/mlclass-ex1/computeCost.m
@ -0,0 +1,26 @@
+function J = computeCost(X, y, theta)
+%COMPUTECOST Compute cost for linear regression
+%   J = COMPUTECOST(X, y, theta) computes the cost of using theta as the
+%   parameter for linear regression to fit the data points in X and y
+
+% Initialize some useful values
+m = length(y); % number of training examples
+
+% You need to return the following variables correctly
+J = 0;
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Compute the cost of a particular choice of theta
+%               You should set J to the cost.
+% X: (m, 2)
+% y: (m, 1)
+% theta: (2, 1)
+
+h = X * theta;
+dif = h - y;
+sqdif = dif .^ 2;
+J = 1 / (2 * m) * sum(sqdif);
+
+% =========================================================================
+
+end
--- a/science/r/6/mlclass-ex1/computeCostMulti.m
+++ b/science/r/6/mlclass-ex1/computeCostMulti.m
@ -0,0 +1,23 @@
+function J = computeCostMulti(X, y, theta)
+%COMPUTECOSTMULTI Compute cost for linear regression with multiple variables
+%   J = COMPUTECOSTMULTI(X, y, theta) computes the cost of using theta as the
+%   parameter for linear regression to fit the data points in X and y
+
+% Initialize some useful values
+m = length(y); % number of training examples
+
+% You need to return the following variables correctly
+J = 0;
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Compute the cost of a particular choice of theta
+%               You should set J to the cost.
+
+h = X * theta;
+dif = h - y;
+sqdif = dif .^ 2;
+J = 1 / (2 * m) * sum(sqdif);
+
+% =========================================================================
+
+end
--- a/science/r/6/mlclass-ex1/ex1.m
+++ b/science/r/6/mlclass-ex1/ex1.m
@ -0,0 +1,122 @@
+%% Machine Learning Online Class - Exercise 1: Linear Regression
+
+%  Instructions
+%  ------------
+% 
+%  This file contains code that helps you get started on the
+%  linear exercise. You will need to complete the following functions 
+%  in this exericse:
+%
+%     warmUpExercise.m
+%     plotData.m
+%     gradientDescent.m
+%     computeCost.m
+%     gradientDescentMulti.m
+%     computeCostMulti.m
+%     featureNormalize.m
+%     normalEqn.m
+%
+%  For this exercise, you will not need to change any code in this file,
+%  or any other files other than those mentioned above.
+%
+% x refers to the population size in 10,000s
+% y refers to the profit in $10,000s
+%
+
+%% Initialization
+clear all; close all; clc
+
+%% ==================== Part 1: Basic Function ====================
+% Complete warmUpExercise.m 
+fprintf('Running warmUpExercise ... \n');
+fprintf('5x5 Identity Matrix: \n');
+warmUpExercise()
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+
+%% ======================= Part 2: Plotting =======================
+fprintf('Plotting Data ...\n')
+data = load('ex1data1.txt');
+X = data(:, 1); y = data(:, 2);
+m = length(y); % number of training examples
+
+% Plot Data
+% Note: You have to complete the code in plotData.m
+plotData(X, y);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% =================== Part 3: Gradient descent ===================
+fprintf('Running Gradient Descent ...\n')
+
+X = [ones(m, 1), data(:,1)]; % Add a column of ones to x
+theta = zeros(2, 1); % initialize fitting parameters
+
+% Some gradient descent settings
+iterations = 1500;
+alpha = 0.01;
+
+% compute and display initial cost
+computeCost(X, y, theta)
+
+% run gradient descent
+theta = gradientDescent(X, y, theta, alpha, iterations);
+
+% print theta to screen
+fprintf('Theta found by gradient descent: ');
+fprintf('%f %f \n', theta(1), theta(2));
+
+% Plot the linear fit
+hold on; % keep previous plot visible
+plot(X(:,2), X*theta, '-')
+legend('Training data', 'Linear regression')
+hold off % don't overlay any more plots on this figure
+
+% Predict values for population sizes of 35,000 and 70,000
+predict1 = [1, 3.5] *theta;
+fprintf('For population = 35,000, we predict a profit of %f\n',...
+    predict1*10000);
+predict2 = [1, 7] * theta;
+fprintf('For population = 70,000, we predict a profit of %f\n',...
+    predict2*10000);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% ============= Part 4: Visualizing J(theta_0, theta_1) =============
+fprintf('Visualizing J(theta_0, theta_1) ...\n')
+
+% Grid over which we will calculate J
+theta0_vals = linspace(-10, 10, 100);
+theta1_vals = linspace(-1, 4, 100);
+
+% initialize J_vals to a matrix of 0's
+J_vals = zeros(length(theta0_vals), length(theta1_vals));
+
+% Fill out J_vals
+for i = 1:length(theta0_vals)
+    for j = 1:length(theta1_vals)
+	  t = [theta0_vals(i); theta1_vals(j)];    
+	  J_vals(i,j) = computeCost(X, y, t);
+    end
+end
+
+
+% Because of the way meshgrids work in the surf command, we need to 
+% transpose J_vals before calling surf, or else the axes will be flipped
+J_vals = J_vals';
+% Surface plot
+figure;
+surf(theta0_vals, theta1_vals, J_vals)
+xlabel('\theta_0'); ylabel('\theta_1');
+
+% Contour plot
+figure;
+% Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100
+contour(theta0_vals, theta1_vals, J_vals, logspace(-2, 3, 20))
+xlabel('\theta_0'); ylabel('\theta_1');
+hold on;
+plot(theta(1), theta(2), 'rx', 'MarkerSize', 10, 'LineWidth', 2);
--- a/science/r/6/mlclass-ex1/ex1_multi.m
+++ b/science/r/6/mlclass-ex1/ex1_multi.m
@ -0,0 +1,167 @@
+%% Machine Learning Online Class
+%  Exercise 1: Linear regression with multiple variables
+%
+%  Instructions
+%  ------------
+%
+%  This file contains code that helps you get started on the
+%  linear regression exercise.
+%
+%  You will need to complete the following functions in this
+%  exericse:
+%
+%     warmUpExercise.m
+%     plotData.m
+%     gradientDescent.m
+%     computeCost.m
+%     gradientDescentMulti.m
+%     computeCostMulti.m
+%     featureNormalize.m
+%     normalEqn.m
+%
+%  For this part of the exercise, you will need to change some
+%  parts of the code below for various experiments (e.g., changing
+%  learning rates).
+%
+
+%% Initialization
+
+%% ================ Part 1: Feature Normalization ================
+
+%% Clear and Close Figures
+clear all; close all; clc
+
+fprintf('Loading data ...\n');
+
+%% Load Data
+data = load('ex1data2.txt');
+X = data(:, 1:2);
+y = data(:, 3);
+m = length(y);
+
+% Print out some data points
+fprintf('First 10 examples from the dataset: \n');
+fprintf(' x = [%.0f %.0f], y = %.0f \n', [X(1:10,:) y(1:10,:)]');
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+% Scale features and set them to zero mean
+fprintf('Normalizing Features ...\n');
+
+[X mu sigma] = featureNormalize(X);
+fprintf('mu\n');
+fprintf(' %f \n', mu);
+
+fprintf('sigma\n');
+fprintf(' %f \n', sigma);
+
+% Add intercept term to X
+X = [ones(m, 1) X];
+
+fprintf('Normalized X\n');
+fprintf(' [%f, %f, %f] \n', X(1:10,:)');
+
+%% ================ Part 2: Gradient Descent ================
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: We have provided you with the following starter
+%               code that runs gradient descent with a particular
+%               learning rate (alpha).
+%
+%               Your task is to first make sure that your functions -
+%               computeCost and gradientDescent already work with
+%               this starter code and support multiple variables.
+%
+%               After that, try running gradient descent with
+%               different values of alpha and see which one gives
+%               you the best result.
+%
+%               Finally, you should complete the code at the end
+%               to predict the price of a 1650 sq-ft, 3 br house.
+%
+% Hint: By using the 'hold on' command, you can plot multiple
+%       graphs on the same figure.
+%
+% Hint: At prediction, make sure you do the same feature normalization.
+%
+
+fprintf('Running gradient descent ...\n');
+
+% Choose some alpha value
+alpha = 0.01;
+num_iters = 50;
+
+% Init Theta and Run Gradient Descent
+theta = zeros(3, 1);
+[theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters);
+
+% Plot the convergence graph
+figure;
+plot(1:numel(J_history), J_history, '-b', 'LineWidth', 2);
+xlabel('Number of iterations');
+ylabel('Cost J');
+
+% Display gradient descent's result
+fprintf('Theta computed from gradient descent: \n');
+fprintf(' %f \n', theta);
+fprintf('\n');
+
+% Estimate the price of a 1650 sq-ft, 3 br house
+% ====================== YOUR CODE HERE ======================
+% Recall that the first column of X is all-ones. Thus, it does
+% not need to be normalized.
+normed_row = ([1650, 3] - mu) ./ sigma;
+xrow = [1, normed_row];
+price = xrow * theta;
+
+% ============================================================
+
+fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ...
+         '(using gradient descent):\n $%f\n'], price);
+
+fprintf('Program paused. Press enter to continue.\n');
+pause;
+
+%% ================ Part 3: Normal Equations ================
+
+fprintf('Solving with normal equations...\n');
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: The following code computes the closed form
+%               solution for linear regression using the normal
+%               equations. You should complete the code in
+%               normalEqn.m
+%
+%               After doing so, you should complete this code
+%               to predict the price of a 1650 sq-ft, 3 br house.
+%
+
+%% Load Data
+data = csvread('ex1data2.txt');
+X = data(:, 1:2);
+y = data(:, 3);
+m = length(y);
+
+% Add intercept term to X
+X = [ones(m, 1) X];
+
+% Calculate the parameters from the normal equation
+theta = normalEqn(X, y);
+
+% Display normal equation's result
+fprintf('Theta computed from the normal equations: \n');
+fprintf(' %f \n', theta);
+fprintf('\n');
+
+
+% Estimate the price of a 1650 sq-ft, 3 br house
+% ====================== YOUR CODE HERE ======================
+price = [1, 1650, 3] * theta; % You should change this
+
+
+% ============================================================
+
+fprintf(['Predicted price of a 1650 sq-ft, 3 br house ' ...
+         '(using normal equations):\n $%f\n'], price);
+
--- a/science/r/6/mlclass-ex1/ex1data1.txt
+++ b/science/r/6/mlclass-ex1/ex1data1.txt
@ -0,0 +1,97 @@
+6.1101,17.592
+5.5277,9.1302
+8.5186,13.662
+7.0032,11.854
+5.8598,6.8233
+8.3829,11.886
+7.4764,4.3483
+8.5781,12
+6.4862,6.5987
+5.0546,3.8166
+5.7107,3.2522
+14.164,15.505
+5.734,3.1551
+8.4084,7.2258
+5.6407,0.71618
+5.3794,3.5129
+6.3654,5.3048
+5.1301,0.56077
+6.4296,3.6518
+7.0708,5.3893
+6.1891,3.1386
+20.27,21.767
+5.4901,4.263
+6.3261,5.1875
+5.5649,3.0825
+18.945,22.638
+12.828,13.501
+10.957,7.0467
+13.176,14.692
+22.203,24.147
+5.2524,-1.22
+6.5894,5.9966
+9.2482,12.134
+5.8918,1.8495
+8.2111,6.5426
+7.9334,4.5623
+8.0959,4.1164
+5.6063,3.3928
+12.836,10.117
+6.3534,5.4974
+5.4069,0.55657
+6.8825,3.9115
+11.708,5.3854
+5.7737,2.4406
+7.8247,6.7318
+7.0931,1.0463
+5.0702,5.1337
+5.8014,1.844
+11.7,8.0043
+5.5416,1.0179
+7.5402,6.7504
+5.3077,1.8396
+7.4239,4.2885
+7.6031,4.9981
+6.3328,1.4233
+6.3589,-1.4211
+6.2742,2.4756
+5.6397,4.6042
+9.3102,3.9624
+9.4536,5.4141
+8.8254,5.1694
+5.1793,-0.74279
+21.279,17.929
+14.908,12.054
+18.959,17.054
+7.2182,4.8852
+8.2951,5.7442
+10.236,7.7754
+5.4994,1.0173
+20.341,20.992
+10.136,6.6799
+7.3345,4.0259
+6.0062,1.2784
+7.2259,3.3411
+5.0269,-2.6807
+6.5479,0.29678
+7.5386,3.8845
+5.0365,5.7014
+10.274,6.7526
+5.1077,2.0576
+5.7292,0.47953
+5.1884,0.20421
+6.3557,0.67861
+9.7687,7.5435
+6.5159,5.3436
+8.5172,4.2415
+9.1802,6.7981
+6.002,0.92695
+5.5204,0.152
+5.0594,2.8214
+5.7077,1.8451
+7.6366,4.2959
+5.8707,7.2029
+5.3054,1.9869
+8.2934,0.14454
+13.394,9.0551
+5.4369,0.61705
--- a/science/r/6/mlclass-ex1/ex1data2.txt
+++ b/science/r/6/mlclass-ex1/ex1data2.txt
@ -0,0 +1,47 @@
+2104,3,399900
+1600,3,329900
+2400,3,369000
+1416,2,232000
+3000,4,539900
+1985,4,299900
+1534,3,314900
+1427,3,198999
+1380,3,212000
+1494,3,242500
+1940,4,239999
+2000,3,347000
+1890,3,329999
+4478,5,699900
+1268,3,259900
+2300,4,449900
+1320,2,299900
+1236,3,199900
+2609,4,499998
+3031,4,599000
+1767,3,252900
+1888,2,255000
+1604,3,242900
+1962,4,259900
+3890,3,573900
+1100,3,249900
+1458,3,464500
+2526,3,469000
+2200,3,475000
+2637,3,299900
+1839,2,349900
+1000,1,169900
+2040,4,314900
+3137,3,579900
+1811,4,285900
+1437,3,249900
+1239,3,229900
+2132,4,345000
+4215,4,549000
+2162,4,287000
+1664,2,368500
+2238,3,329900
+2567,4,314000
+1200,3,299000
+852,2,179900
+1852,4,299900
+1203,3,239500
--- a/science/r/6/mlclass-ex1/featureNormalize.m
+++ b/science/r/6/mlclass-ex1/featureNormalize.m
@ -0,0 +1,35 @@
+function [X_norm, mu, sigma] = featureNormalize(X)
+%FEATURENORMALIZE Normalizes the features in X
+%   FEATURENORMALIZE(X) returns a normalized version of X where
+%   the mean value of each feature is 0 and the standard deviation
+%   is 1. This is often a good preprocessing step to do when
+%   working with learning algorithms.
+
+% You need to set these values correctly
+X_norm = X;
+mu = zeros(1, size(X, 2));
+sigma = zeros(1, size(X, 2));
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: First, for each feature dimension, compute the mean
+%               of the feature and subtract it from the dataset,
+%               storing the mean value in mu. Next, compute the
+%               standard deviation of each feature and divide
+%               each feature by it's standard deviation, storing
+%               the standard deviation in sigma.
+%
+%               Note that X is a matrix where each column is a
+%               feature and each row is an example. You need
+%               to perform the normalization separately for
+%               each feature.
+%
+% Hint: You might find the 'mean' and 'std' functions useful.
+%
+
+mu = mean(X);
+sigma = std(X);
+X_norm = (X - mu) ./ sigma;
+
+% ============================================================
+
+end
--- a/science/r/6/mlclass-ex1/gradientDescent.m
+++ b/science/r/6/mlclass-ex1/gradientDescent.m
@ -0,0 +1,34 @@
+function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters)
+%GRADIENTDESCENT Performs gradient descent to learn theta
+%   theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by
+%   taking num_iters gradient steps with learning rate alpha
+
+% Initialize some useful values
+m = length(y); % number of training examples
+J_history = zeros(num_iters, 1);
+
+for iter = 1:num_iters
+
+    % ====================== YOUR CODE HERE ======================
+    % Instructions: Perform a single gradient step on the parameter vector
+    %               theta.
+    %
+    % Hint: While debugging, it can be useful to print out the values
+    %       of the cost function (computeCost) and gradient here.
+    %
+    % X: (m, 2)
+    % y: (m, 1)
+    % theta: (2, 1)
+
+    h = X * theta;
+    base = ((h - y)' * X)';
+    theta -= alpha / m * base;
+
+    % ============================================================
+
+    % Save the cost J in every iteration
+    J_history(iter) = computeCost(X, y, theta);
+
+end
+
+end
--- a/science/r/6/mlclass-ex1/gradientDescentMulti.m
+++ b/science/r/6/mlclass-ex1/gradientDescentMulti.m
@ -0,0 +1,31 @@
+function [theta, J_history] = gradientDescentMulti(X, y, theta, alpha, num_iters)
+%GRADIENTDESCENTMULTI Performs gradient descent to learn theta
+%   theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by
+%   taking num_iters gradient steps with learning rate alpha
+
+% Initialize some useful values
+m = length(y); % number of training examples
+J_history = zeros(num_iters, 1);
+
+for iter = 1:num_iters
+
+    % ====================== YOUR CODE HERE ======================
+    % Instructions: Perform a single gradient step on the parameter vector
+    %               theta.
+    %
+    % Hint: While debugging, it can be useful to print out the values
+    %       of the cost function (computeCostMulti) and gradient here.
+    %
+
+    h = X * theta;
+    base = ((h - y)' * X)';
+    theta -= alpha / m * base;
+
+    % ============================================================
+
+    % Save the cost J in every iteration
+    J_history(iter) = computeCostMulti(X, y, theta);
+
+end
+
+end
--- a/science/r/6/mlclass-ex1/normalEqn.m
+++ b/science/r/6/mlclass-ex1/normalEqn.m
@ -0,0 +1,23 @@
+function [theta] = normalEqn(X, y)
+%NORMALEQN Computes the closed-form solution to linear regression
+%   NORMALEQN(X,y) computes the closed-form solution to linear
+%   regression using the normal equations.
+
+theta = zeros(size(X, 2), 1);
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Complete the code to compute the closed form solution
+%               to linear regression and put the result in theta.
+%
+
+% ---------------------- Sample Solution ----------------------
+
+theta = inv(X' * X) * X' * y
+
+
+% -------------------------------------------------------------
+
+
+% ============================================================
+
+end
--- a/science/r/6/mlclass-ex1/plotData.m
+++ b/science/r/6/mlclass-ex1/plotData.m
@ -0,0 +1,25 @@
+function plotData(x, y)
+%PLOTDATA Plots the data points x and y into a new figure
+%   PLOTDATA(x,y) plots the data points and gives the figure axes labels of
+%   population and profit.
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Plot the training data into a figure using the
+%               "figure" and "plot" commands. Set the axes labels using
+%               the "xlabel" and "ylabel" commands. Assume the
+%               population and revenue data have been passed in
+%               as the x and y arguments of this function.
+%
+% Hint: You can use the 'rx' option with plot to have the markers
+%       appear as red crosses. Furthermore, you can make the
+%       markers larger by using plot(..., 'rx', 'MarkerSize', 10);
+
+figure; % open a new figure window
+
+plot(x, y, 'rx', 'markersize', 10)
+ylabel('profit in 10000')
+xlabel('population in 10000')
+
+% ============================================================
+
+end
--- a/science/r/6/mlclass-ex1/submit.m
+++ b/science/r/6/mlclass-ex1/submit.m
@ -0,0 +1,336 @@
+function submit(part)
+%SUBMIT Submit your code and output to the ml-class servers
+%   SUBMIT() will connect to the ml-class server and submit your solution
+
+  fprintf('==\n== [ml-class] Submitting Solutions | Programming Exercise %s\n==\n', ...
+          homework_id());
+  if ~exist('part', 'var') || isempty(part)
+    partId = promptPart();
+  end
+  
+  % Check valid partId
+  partNames = validParts();
+  if ~isValidPartId(partId)
+    fprintf('!! Invalid homework part selected.\n');
+    fprintf('!! Expected an integer from 1 to %d.\n', numel(partNames) + 1);
+    fprintf('!! Submission Cancelled\n');
+    return
+  end
+
+  [login password] = loginPrompt();
+  if isempty(login)
+    fprintf('!! Submission Cancelled\n');
+    return
+  end
+
+  fprintf('\n== Connecting to ml-class ... '); 
+  if exist('OCTAVE_VERSION') 
+    fflush(stdout);
+  end
+  
+  % Setup submit list
+  if partId == numel(partNames) + 1
+    submitParts = 1:numel(partNames);
+  else
+    submitParts = [partId];
+  end
+
+  for s = 1:numel(submitParts)
+    % Submit this part
+    partId = submitParts(s);
+    
+    % Get Challenge
+    [login, ch, signature] = getChallenge(login);
+    if isempty(login) || isempty(ch) || isempty(signature)
+      % Some error occured, error string in first return element.
+      fprintf('\n!! Error: %s\n\n', login);
+      return
+    end
+  
+    % Attempt Submission with Challenge
+    ch_resp = challengeResponse(login, password, ch);
+    [result, str] = submitSolution(login, ch_resp, partId, output(partId), ...
+                                 source(partId), signature);
+                                 
+    fprintf('\n== [ml-class] Submitted Homework %s - Part %d - %s\n', ...
+            homework_id(), partId, partNames{partId});
+    fprintf('== %s\n', strtrim(str));
+    if exist('OCTAVE_VERSION') 
+      fflush(stdout);
+    end
+  end
+  
+end
+
+% ================== CONFIGURABLES FOR EACH HOMEWORK ==================
+
+function id = homework_id() 
+  id = '1';
+end
+
+function [partNames] = validParts()
+  partNames = { 'Warm up exercise ', ...
+                'Computing Cost (for one variable)', ...
+                'Gradient Descent (for one variable)', ...
+                'Feature Normalization', ...
+                'Computing Cost (for multiple variables)', ...
+                'Gradient Descent (for multiple variables)', ...
+                'Normal Equations'};
+end
+
+function srcs = sources()
+  % Separated by part
+  srcs = { { 'warmUpExercise.m' }, ...
+           { 'computeCost.m' }, ...
+           { 'gradientDescent.m' }, ...
+           { 'featureNormalize.m' }, ...
+           { 'computeCostMulti.m' }, ...
+           { 'gradientDescentMulti.m' }, ...
+           { 'normalEqn.m' }, ...
+         };
+end
+
+function out = output(partId)
+  % Random Test Cases
+  X1 = [ones(20,1) (exp(1) + exp(2) * (0.1:0.1:2))'];
+  Y1 = X1(:,2) + sin(X1(:,1)) + cos(X1(:,2));
+  X2 = [X1 X1(:,2).^0.5 X1(:,2).^0.25];
+  Y2 = Y1.^0.5 + Y1;
+  if partId == 1
+    out = sprintf('%0.5f ', warmUpExercise());
+  elseif partId == 2
+    out = sprintf('%0.5f ', computeCost(X1, Y1, [0.5 -0.5]'));
+  elseif partId == 3
+    out = sprintf('%0.5f ', gradientDescent(X1, Y1, [0.5 -0.5]', 0.01, 10));
+  elseif partId == 4
+    out = sprintf('%0.5f ', featureNormalize(X2(:,2:4)));
+  elseif partId == 5
+    out = sprintf('%0.5f ', computeCostMulti(X2, Y2, [0.1 0.2 0.3 0.4]'));
+  elseif partId == 6
+    out = sprintf('%0.5f ', gradientDescentMulti(X2, Y2, [-0.1 -0.2 -0.3 -0.4]', 0.01, 10));
+  elseif partId == 7
+    out = sprintf('%0.5f ', normalEqn(X2, Y2));
+  end 
+end
+
+function url = challenge_url()
+  url = 'http://www.ml-class.org/course/homework/challenge';
+end
+
+function url = submit_url()
+  url = 'http://www.ml-class.org/course/homework/submit';
+end
+
+% ========================= CHALLENGE HELPERS =========================
+
+function src = source(partId)
+  src = '';
+  src_files = sources();
+  if partId <= numel(src_files)
+      flist = src_files{partId};
+      for i = 1:numel(flist)
+          fid = fopen(flist{i});
+          while ~feof(fid)
+            line = fgets(fid);
+            src = [src line];
+          end
+          src = [src '||||||||'];
+      end
+  end
+end
+
+function ret = isValidPartId(partId)
+  partNames = validParts();
+  ret = (~isempty(partId)) && (partId >= 1) && (partId <= numel(partNames) + 1);
+end
+
+function partId = promptPart()
+  fprintf('== Select which part(s) to submit:\n', ...
+          homework_id());
+  partNames = validParts();
+  srcFiles = sources();
+  for i = 1:numel(partNames)
+    fprintf('==   %d) %s [', i, partNames{i});
+    fprintf(' %s ', srcFiles{i}{:});
+    fprintf(']\n');
+  end
+  fprintf('==   %d) All of the above \n==\nEnter your choice [1-%d]: ', ...
+          numel(partNames) + 1, numel(partNames) + 1);
+  selPart = input('', 's');
+  partId = str2num(selPart);
+  if ~isValidPartId(partId)
+    partId = -1;
+  end
+end
+
+function [email,ch,signature] = getChallenge(email)
+  str = urlread(challenge_url(), 'post', {'email_address', email});
+
+  str = strtrim(str);
+  [email, str] = strtok (str, '|');
+  [ch, str] = strtok (str, '|');
+  [signature, str] = strtok (str, '|');
+end
+
+
+function [result, str] = submitSolution(email, ch_resp, part, output, ...
+                                        source, signature)
+
+  params = {'homework', homework_id(), ...
+            'part', num2str(part), ...
+            'email', email, ...
+            'output', output, ...
+            'source', source, ...
+            'challenge_response', ch_resp, ...
+            'signature', signature};
+
+  str = urlread(submit_url(), 'post', params);
+  
+  % Parse str to read for success / failure
+  result = 0;
+
+end
+
+% =========================== LOGIN HELPERS ===========================
+
+function [login password] = loginPrompt()
+  % Prompt for password
+  [login password] = basicPrompt();
+  
+  if isempty(login) || isempty(password)
+    login = []; password = [];
+  end
+end
+
+
+function [login password] = basicPrompt()
+  login = input('Login (Email address): ', 's');
+  password = input('Password: ', 's');
+end
+
+
+function [str] = challengeResponse(email, passwd, challenge)
+  salt = ')~/|]QMB3[!W`?OVt7qC"@+}';
+  str = sha1([challenge sha1([salt email passwd])]);
+  sel = randperm(numel(str));
+  sel = sort(sel(1:16));
+  str = str(sel);
+end
+
+
+% =============================== SHA-1 ================================
+
+function hash = sha1(str)
+  
+  % Initialize variables
+  h0 = uint32(1732584193);
+  h1 = uint32(4023233417);
+  h2 = uint32(2562383102);
+  h3 = uint32(271733878);
+  h4 = uint32(3285377520);
+  
+  % Convert to word array
+  strlen = numel(str);
+
+  % Break string into chars and append the bit 1 to the message
+  mC = [double(str) 128];
+  mC = [mC zeros(1, 4-mod(numel(mC), 4), 'uint8')];
+  
+  numB = strlen * 8;
+  if exist('idivide')
+    numC = idivide(uint32(numB + 65), 512, 'ceil');
+  else
+    numC = ceil(double(numB + 65)/512);
+  end
+  numW = numC * 16;
+  mW = zeros(numW, 1, 'uint32');
+  
+  idx = 1;
+  for i = 1:4:strlen + 1
+    mW(idx) = bitor(bitor(bitor( ...
+                  bitshift(uint32(mC(i)), 24), ...
+                  bitshift(uint32(mC(i+1)), 16)), ...
+                  bitshift(uint32(mC(i+2)), 8)), ...
+                  uint32(mC(i+3)));
+    idx = idx + 1;
+  end
+  
+  % Append length of message
+  mW(numW - 1) = uint32(bitshift(uint64(numB), -32));
+  mW(numW) = uint32(bitshift(bitshift(uint64(numB), 32), -32));
+
+  % Process the message in successive 512-bit chs
+  for cId = 1 : double(numC)
+    cSt = (cId - 1) * 16 + 1;
+    cEnd = cId * 16;
+    ch = mW(cSt : cEnd);
+    
+    % Extend the sixteen 32-bit words into eighty 32-bit words
+    for j = 17 : 80
+      ch(j) = ch(j - 3);
+      ch(j) = bitxor(ch(j), ch(j - 8));
+      ch(j) = bitxor(ch(j), ch(j - 14));
+      ch(j) = bitxor(ch(j), ch(j - 16));
+      ch(j) = bitrotate(ch(j), 1);
+    end
+  
+    % Initialize hash value for this ch
+    a = h0;
+    b = h1;
+    c = h2;
+    d = h3;
+    e = h4;
+    
+    % Main loop
+    for i = 1 : 80
+      if(i >= 1 && i <= 20)
+        f = bitor(bitand(b, c), bitand(bitcmp(b), d));
+        k = uint32(1518500249);
+      elseif(i >= 21 && i <= 40)
+        f = bitxor(bitxor(b, c), d);
+        k = uint32(1859775393);
+      elseif(i >= 41 && i <= 60)
+        f = bitor(bitor(bitand(b, c), bitand(b, d)), bitand(c, d));
+        k = uint32(2400959708);
+      elseif(i >= 61 && i <= 80)
+        f = bitxor(bitxor(b, c), d);
+        k = uint32(3395469782);
+      end
+      
+      t = bitrotate(a, 5);
+      t = bitadd(t, f);
+      t = bitadd(t, e);
+      t = bitadd(t, k);
+      t = bitadd(t, ch(i));
+      e = d;
+      d = c;
+      c = bitrotate(b, 30);
+      b = a;
+      a = t;
+      
+    end
+    h0 = bitadd(h0, a);
+    h1 = bitadd(h1, b);
+    h2 = bitadd(h2, c);
+    h3 = bitadd(h3, d);
+    h4 = bitadd(h4, e);
+
+  end
+
+  hash = reshape(dec2hex(double([h0 h1 h2 h3 h4]), 8)', [1 40]);
+  
+  hash = lower(hash);
+
+end
+
+function ret = bitadd(iA, iB)
+  ret = double(iA) + double(iB);
+  ret = bitset(ret, 33, 0);
+  ret = uint32(ret);
+end
+
+function ret = bitrotate(iA, places)
+  t = bitshift(iA, places - 32);
+  ret = bitshift(iA, places);
+  ret = bitor(ret, t);
+end
--- a/science/r/6/mlclass-ex1/warmUpExercise.m
+++ b/science/r/6/mlclass-ex1/warmUpExercise.m
@ -0,0 +1,15 @@
+function A = warmUpExercise()
+
+% Instructions: Return the 5x5 identity matrix
+%               In octave, we return values by defining which variables
+%               represent the return values (at the top of the file)
+%               and then set them accordingly.
+  A = zeros(5, 5);
+  for i=1:5
+    A(i, i) = 1;
+  end
+
+% ===========================================
+
+
+end
--- a/science/r/7.rmd
+++ b/science/r/7.rmd
@ -0,0 +1,91 @@
+---
+title: "Lab7: Logistic regression"
+author: "Vladislav Litvinov <vlad@sek1ro>"
+output:
+  pdf_document:
+  toc_float: TRUE
+---
+# Logit
+
+```{r}
+n = 500
+breaks = 100
+odds = numeric(n)
+logit = numeric(n)
+for (i in 1:n) {
+  p = runif(1)
+  odds[i] = p / (1 - p)
+  logit[i] = log(odds[i])
+}
+hist(odds, breaks = breaks)
+hist(logit, breaks = breaks)
+
+data <- matrix(nrow=4, ncol=2, byrow=TRUE, 
+               data=c(2, 3,
+                      0, 3,
+                      0, 2,
+                      1, 2))
+
+fisher.test(data)
+```
+# Data preparation
+```{r}
+setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
+surve = read.csv('survey.csv')
+
+head(survey)
+
+survey$price20 = ifelse(survey$Price == 20, 1, 0)
+survey$price30 = ifelse(survey$Price == 30, 1, 0)
+head(survey)
+```
+# Model training
+
+Residuals are the differences between what we observe and what our model predicts.
+Residuals greater than the absolute value of 3 are in the tails of a standard normal distribution and usually indicate strain in the model.
+
+https://stats.stackexchange.com/questions/48178/how-to-interpret-the-intercept-term-in-a-glm
+https://library.virginia.edu/data/articles/understanding-deviance-residuals
+```{r}
+model = glm(
+  MYDEPV ~ Income + Age + price20 + price30,
+  binomial(link = "logit"),
+  survey
+)
+summary(model)
+quantile(residuals(model))
+```
+# Predicts for the model
+```{r}
+survey$odds_ratio = exp(predict(model))
+survey$prediction = survey$odds_ratio / (1 + survey$odds_ratio)
+head(survey)
+
+sum(survey$MYDEPV)
+sum(survey$prediction)
+
+new_person = data.frame(
+  Income = 58,
+  Age = 25,
+  price20 = 1,
+  price30 = 0
+)
+
+prob = predict(model, new_person, type="response")
+prob
+```
+
+```{r}
+library(ggplot2)
+predicted <- data.frame(
+  prob=model$fitted.values,
+  MYDEPV=survey$MYDEPV)
+ 
+predicted <- predicted[order(predicted$prob, decreasing=FALSE),]
+predicted$rank <- 1:nrow(predicted)
+ 
+ggplot(data=predicted, aes(x=rank, y=prob)) +
+  geom_point(aes(color=MYDEPV), alpha=0.5, shape=4, stroke=1) +
+  xlab("Index") +
+  ylab("MYDEPV")
+```
--- a/science/r/7/ex2.pdf
+++ b/science/r/7/ex2.pdf
--- a/science/r/7/mlclass-ex2/costFunction.m
+++ b/science/r/7/mlclass-ex2/costFunction.m
@ -0,0 +1,30 @@
+function [J, grad] = costFunction(theta, X, y)
+%COSTFUNCTION Compute cost and gradient for logistic regression
+%   J = COSTFUNCTION(theta, X, y) computes the cost of using theta as the
+%   parameter for logistic regression and the gradient of the cost
+%   w.r.t. to the parameters.
+
+% Initialize some useful values
+m = length(y); % number of training examples
+
+% You need to return the following variables correctly
+J = 0;
+grad = zeros(size(theta));
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Compute the cost of a particular choice of theta.
+%               You should set J to the cost.
+%               Compute the partial derivatives and set grad to the partial
+%               derivatives of the cost w.r.t. each parameter in theta
+%
+% Note: grad should have the same dimensions as theta
+%
+
+h = sigmoid(X * theta);
+base = - y .* log(h) - (1 - y) .* log(1 - h);
+J = 1 / m * sum(base, 1);
+
+grad = 1 / m * (X' * (h - y));
+% =============================================================
+
+end
--- a/science/r/7/mlclass-ex2/costFunctionReg.m
+++ b/science/r/7/mlclass-ex2/costFunctionReg.m
@ -0,0 +1,27 @@
+function [J, grad] = costFunctionReg(theta, X, y, lambda)
+%COSTFUNCTIONREG Compute cost and gradient for logistic regression with regularization
+%   J = COSTFUNCTIONREG(theta, X, y, lambda) computes the cost of using
+%   theta as the parameter for regularized logistic regression and the
+%   gradient of the cost w.r.t. to the parameters.
+
+% Initialize some useful values
+m = length(y); % number of training examples
+
+% You need to return the following variables correctly
+J = 0;
+grad = zeros(size(theta));
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Compute the cost of a particular choice of theta.
+%               You should set J to the cost.
+%               Compute the partial derivatives and set grad to the partial
+%               derivatives of the cost w.r.t. each parameter in theta
+
+[J_, grad_] = costFunction(theta, X, y);
+J = J_ + lambda / (2 * m) * (theta' * theta);
+grad = grad_ + theta * lambda / m;
+grad(1) = grad_(1);
+
+% =============================================================
+
+end
--- a/science/r/7/mlclass-ex2/ex2.m
+++ b/science/r/7/mlclass-ex2/ex2.m
@ -0,0 +1,135 @@
+%% Machine Learning Online Class - Exercise 2: Logistic Regression
+%
+%  Instructions
+%  ------------
+% 
+%  This file contains code that helps you get started on the logistic
+%  regression exercise. You will need to complete the following functions 
+%  in this exericse:
+%
+%     sigmoid.m
+%     costFunction.m
+%     predict.m
+%     costFunctionReg.m
+%
+%  For this exercise, you will not need to change any code in this file,
+%  or any other files other than those mentioned above.
+%
+
+%% Initialization
+clear ; close all; clc
+
+%% Load Data
+%  The first two columns contains the exam scores and the third column
+%  contains the label.
+
+data = load('ex2data1.txt');
+X = data(:, [1, 2]); y = data(:, 3);
+
+%% ==================== Part 1: Plotting ====================
+%  We start the exercise by first plotting the data to understand the 
+%  the problem we are working with.
+
+fprintf(['Plotting data with + indicating (y = 1) examples and o ' ...
+         'indicating (y = 0) examples.\n']);
+
+plotData(X, y);
+
+% Put some labels 
+hold on;
+% Labels and Legend
+xlabel('Exam 1 score')
+ylabel('Exam 2 score')
+
+% Specified in plot order
+legend('Admitted', 'Not admitted')
+hold off;
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+
+
+%% ============ Part 2: Compute Cost and Gradient ============
+%  In this part of the exercise, you will implement the cost and gradient
+%  for logistic regression. You neeed to complete the code in 
+%  costFunction.m
+
+%  Setup the data matrix appropriately, and add ones for the intercept term
+[m, n] = size(X);
+
+% Add intercept term to x and X_test
+X = [ones(m, 1) X];
+
+% Initialize fitting parameters
+initial_theta = zeros(n + 1, 1);
+
+% Compute and display initial cost and gradient
+[cost, grad] = costFunction(initial_theta, X, y);
+
+fprintf('Cost at initial theta (zeros): %f\n', cost);
+fprintf('Gradient at initial theta (zeros): \n');
+fprintf(' %f \n', grad);
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+
+
+%% ============= Part 3: Optimizing using fminunc  =============
+%  In this exercise, you will use a built-in function (fminunc) to find the
+%  optimal parameters theta.
+
+%  Set options for fminunc
+options = optimset('GradObj', 'on', 'MaxIter', 400);
+
+%  Run fminunc to obtain the optimal theta
+%  This function will return theta and the cost 
+[theta, cost] = ...
+	fminunc(@(t)(costFunction(t, X, y)), initial_theta, options);
+
+% Print theta to screen
+fprintf('Cost at theta found by fminunc: %f\n', cost);
+fprintf('theta: \n');
+fprintf(' %f \n', theta);
+
+% Plot Boundary
+plotDecisionBoundary(theta, X, y);
+
+% Put some labels 
+hold on;
+% Labels and Legend
+xlabel('Exam 1 score')
+ylabel('Exam 2 score')
+
+% Specified in plot order
+legend('Admitted', 'Not admitted')
+hold off;
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+
+%% ============== Part 4: Predict and Accuracies ==============
+%  After learning the parameters, you'll like to use it to predict the outcomes
+%  on unseen data. In this part, you will use the logistic regression model
+%  to predict the probability that a student with score 20 on exam 1 and 
+%  score 80 on exam 2 will be admitted.
+%
+%  Furthermore, you will compute the training and test set accuracies of 
+%  our model.
+%
+%  Your task is to complete the code in predict.m
+
+%  Predict probability for a student with score 45 on exam 1 
+%  and score 85 on exam 2 
+
+prob = sigmoid([1 45 85] * theta);
+fprintf(['For a student with scores 45 and 85, we predict an admission ' ...
+         'probability of %f\n\n'], prob);
+
+% Compute accuracy on our training set
+p = predict(theta, X);
+
+fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100);
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+
--- a/science/r/7/mlclass-ex2/ex2_reg.m
+++ b/science/r/7/mlclass-ex2/ex2_reg.m
@ -0,0 +1,126 @@
+%% Machine Learning Online Class - Exercise 2: Logistic Regression
+%
+%  Instructions
+%  ------------
+%
+%  This file contains code that helps you get started on the second part
+%  of the exercise which covers regularization with logistic regression.
+%
+%  You will need to complete the following functions in this exericse:
+%
+%     sigmoid.m
+%     costFunction.m
+%     predict.m
+%     costFunctionReg.m
+%
+%  For this exercise, you will not need to change any code in this file,
+%  or any other files other than those mentioned above.
+%
+
+%% Initialization
+clear ; close all; clc
+
+%% Load Data
+%  The first two columns contains the exam scores and the third column
+%  contains the label.
+
+data = load('ex2data2.txt');
+X = data(:, [1, 2]); y = data(:, 3);
+
+plotData(X, y);
+
+% Put some labels
+hold on;
+
+% Labels and Legend
+xlabel('Microchip Test 1')
+ylabel('Microchip Test 2')
+
+% Specified in plot order
+legend('y = 1', 'y = 0')
+hold off;
+
+
+%% =========== Part 1: Regularized Logistic Regression ============
+%  In this part, you are given a dataset with data points that are not
+%  linearly separable. However, you would still like to use logistic
+%  regression to classify the data points.
+%
+%  To do so, you introduce more features to use -- in particular, you add
+%  polynomial features to our data matrix (similar to polynomial
+%  regression).
+%
+
+% Add Polynomial Features
+
+% Note that mapFeature also adds a column of ones for us, so the intercept
+% term is handled
+X = mapFeature(X(:,1), X(:,2));
+
+% Initialize fitting parameters
+initial_theta = zeros(size(X, 2), 1);
+
+% Set regularization parameter lambda to 1
+lambda = 1;
+
+% Compute and display initial cost and gradient for regularized logistic
+% regression
+[cost, grad] = costFunctionReg(initial_theta, X, y, lambda);
+
+fprintf('Cost at initial theta (zeros): %f\n', cost);
+
+fprintf('\nProgram paused. Press enter to continue.\n');
+pause;
+
+%% ============= Part 2: Regularization and Accuracies =============
+%  Optional Exercise:
+%  In this part, you will get to try different values of lambda and
+%  see how regularization affects the decision coundart
+%
+%  Try the following values of lambda (0, 1, 10, 100).
+%
+%  How does the decision boundary change when you vary lambda? How does
+%  the training set accuracy vary?
+%
+
+% Initialize fitting parameters
+initial_theta = zeros(size(X, 2), 1);
+
+% Set regularization parameter lambda to 1 (you should vary this)
+lambda = 1;
+
+% Set Options
+
+options = optimset('GradObj', 'on', 'MaxIter', 400);
+
+%Function Minimum Unconstrained
+[theta, J, exit_flag] = ...
+	fminunc(@(t)(costFunction(t, X, y)), initial_theta, options);
+
+plotDecisionBoundary(theta, X, y);
+p = predict(theta, X);
+
+fprintf('Train Accuracy: %f\n', mean(double(p == y)) * 100);
+
+% Optimize=
+[theta, J, exit_flag] = ...
+	fminunc(@(t)(costFunctionReg(t, X, y, lambda)), initial_theta, options);
+
+% Plot Boundary
+plotDecisionBoundary(theta, X, y);
+hold on;
+title(sprintf('lambda = %g', lambda))
+
+% Labels and Legend
+xlabel('Microchip Test 1')
+ylabel('Microchip Test 2')
+
+legend('y = 1', 'y = 0', 'Decision boundary')
+hold off;
+
+% Compute accuracy on our training set
+p = predict(theta, X);
+
+fprintf('Train Accuracy Reg: %f\n', mean(double(p == y)) * 100);
+
+
--- a/science/r/7/mlclass-ex2/ex2data1.txt
+++ b/science/r/7/mlclass-ex2/ex2data1.txt
@ -0,0 +1,100 @@
+34.62365962451697,78.0246928153624,0
+30.28671076822607,43.89499752400101,0
+35.84740876993872,72.90219802708364,0
+60.18259938620976,86.30855209546826,1
+79.0327360507101,75.3443764369103,1
+45.08327747668339,56.3163717815305,0
+61.10666453684766,96.51142588489624,1
+75.02474556738889,46.55401354116538,1
+76.09878670226257,87.42056971926803,1
+84.43281996120035,43.53339331072109,1
+95.86155507093572,38.22527805795094,0
+75.01365838958247,30.60326323428011,0
+82.30705337399482,76.48196330235604,1
+69.36458875970939,97.71869196188608,1
+39.53833914367223,76.03681085115882,0
+53.9710521485623,89.20735013750205,1
+69.07014406283025,52.74046973016765,1
+67.94685547711617,46.67857410673128,0
+70.66150955499435,92.92713789364831,1
+76.97878372747498,47.57596364975532,1
+67.37202754570876,42.83843832029179,0
+89.67677575072079,65.79936592745237,1
+50.534788289883,48.85581152764205,0
+34.21206097786789,44.20952859866288,0
+77.9240914545704,68.9723599933059,1
+62.27101367004632,69.95445795447587,1
+80.1901807509566,44.82162893218353,1
+93.114388797442,38.80067033713209,0
+61.83020602312595,50.25610789244621,0
+38.78580379679423,64.99568095539578,0
+61.379289447425,72.80788731317097,1
+85.40451939411645,57.05198397627122,1
+52.10797973193984,63.12762376881715,0
+52.04540476831827,69.43286012045222,1
+40.23689373545111,71.16774802184875,0
+54.63510555424817,52.21388588061123,0
+33.91550010906887,98.86943574220611,0
+64.17698887494485,80.90806058670817,1
+74.78925295941542,41.57341522824434,0
+34.1836400264419,75.2377203360134,0
+83.90239366249155,56.30804621605327,1
+51.54772026906181,46.85629026349976,0
+94.44336776917852,65.56892160559052,1
+82.36875375713919,40.61825515970618,0
+51.04775177128865,45.82270145776001,0
+62.22267576120188,52.06099194836679,0
+77.19303492601364,70.45820000180959,1
+97.77159928000232,86.7278223300282,1
+62.07306379667647,96.76882412413983,1
+91.56497449807442,88.69629254546599,1
+79.94481794066932,74.16311935043758,1
+99.2725269292572,60.99903099844988,1
+90.54671411399852,43.39060180650027,1
+34.52451385320009,60.39634245837173,0
+50.2864961189907,49.80453881323059,0
+49.58667721632031,59.80895099453265,0
+97.64563396007767,68.86157272420604,1
+32.57720016809309,95.59854761387875,0
+74.24869136721598,69.82457122657193,1
+71.79646205863379,78.45356224515052,1
+75.3956114656803,85.75993667331619,1
+35.28611281526193,47.02051394723416,0
+56.25381749711624,39.26147251058019,0
+30.05882244669796,49.59297386723685,0
+44.66826172480893,66.45008614558913,0
+66.56089447242954,41.09209807936973,0
+40.45755098375164,97.53518548909936,1
+49.07256321908844,51.88321182073966,0
+80.27957401466998,92.11606081344084,1
+66.74671856944039,60.99139402740988,1
+32.72283304060323,43.30717306430063,0
+64.0393204150601,78.03168802018232,1
+72.34649422579923,96.22759296761404,1
+60.45788573918959,73.09499809758037,1
+58.84095621726802,75.85844831279042,1
+99.82785779692128,72.36925193383885,1
+47.26426910848174,88.47586499559782,1
+50.45815980285988,75.80985952982456,1
+60.45555629271532,42.50840943572217,0
+82.22666157785568,42.71987853716458,0
+88.9138964166533,69.80378889835472,1
+94.83450672430196,45.69430680250754,1
+67.31925746917527,66.58935317747915,1
+57.23870631569862,59.51428198012956,1
+80.36675600171273,90.96014789746954,1
+68.46852178591112,85.59430710452014,1
+42.0754545384731,78.84478600148043,0
+75.47770200533905,90.42453899753964,1
+78.63542434898018,96.64742716885644,1
+52.34800398794107,60.76950525602592,0
+94.09433112516793,77.15910509073893,1
+90.44855097096364,87.50879176484702,1
+55.48216114069585,35.57070347228866,0
+74.49269241843041,84.84513684930135,1
+89.84580670720979,45.35828361091658,1
+83.48916274498238,48.38028579728175,1
+42.2617008099817,87.10385094025457,1
+99.31500880510394,68.77540947206617,1
+55.34001756003703,64.9319380069486,1
+74.77589300092767,89.52981289513276,1
--- a/science/r/7/mlclass-ex2/ex2data2.txt
+++ b/science/r/7/mlclass-ex2/ex2data2.txt
@ -0,0 +1,118 @@
+0.051267,0.69956,1
+-0.092742,0.68494,1
+-0.21371,0.69225,1
+-0.375,0.50219,1
+-0.51325,0.46564,1
+-0.52477,0.2098,1
+-0.39804,0.034357,1
+-0.30588,-0.19225,1
+0.016705,-0.40424,1
+0.13191,-0.51389,1
+0.38537,-0.56506,1
+0.52938,-0.5212,1
+0.63882,-0.24342,1
+0.73675,-0.18494,1
+0.54666,0.48757,1
+0.322,0.5826,1
+0.16647,0.53874,1
+-0.046659,0.81652,1
+-0.17339,0.69956,1
+-0.47869,0.63377,1
+-0.60541,0.59722,1
+-0.62846,0.33406,1
+-0.59389,0.005117,1
+-0.42108,-0.27266,1
+-0.11578,-0.39693,1
+0.20104,-0.60161,1
+0.46601,-0.53582,1
+0.67339,-0.53582,1
+-0.13882,0.54605,1
+-0.29435,0.77997,1
+-0.26555,0.96272,1
+-0.16187,0.8019,1
+-0.17339,0.64839,1
+-0.28283,0.47295,1
+-0.36348,0.31213,1
+-0.30012,0.027047,1
+-0.23675,-0.21418,1
+-0.06394,-0.18494,1
+0.062788,-0.16301,1
+0.22984,-0.41155,1
+0.2932,-0.2288,1
+0.48329,-0.18494,1
+0.64459,-0.14108,1
+0.46025,0.012427,1
+0.6273,0.15863,1
+0.57546,0.26827,1
+0.72523,0.44371,1
+0.22408,0.52412,1
+0.44297,0.67032,1
+0.322,0.69225,1
+0.13767,0.57529,1
+-0.0063364,0.39985,1
+-0.092742,0.55336,1
+-0.20795,0.35599,1
+-0.20795,0.17325,1
+-0.43836,0.21711,1
+-0.21947,-0.016813,1
+-0.13882,-0.27266,1
+0.18376,0.93348,0
+0.22408,0.77997,0
+0.29896,0.61915,0
+0.50634,0.75804,0
+0.61578,0.7288,0
+0.60426,0.59722,0
+0.76555,0.50219,0
+0.92684,0.3633,0
+0.82316,0.27558,0
+0.96141,0.085526,0
+0.93836,0.012427,0
+0.86348,-0.082602,0
+0.89804,-0.20687,0
+0.85196,-0.36769,0
+0.82892,-0.5212,0
+0.79435,-0.55775,0
+0.59274,-0.7405,0
+0.51786,-0.5943,0
+0.46601,-0.41886,0
+0.35081,-0.57968,0
+0.28744,-0.76974,0
+0.085829,-0.75512,0
+0.14919,-0.57968,0
+-0.13306,-0.4481,0
+-0.40956,-0.41155,0
+-0.39228,-0.25804,0
+-0.74366,-0.25804,0
+-0.69758,0.041667,0
+-0.75518,0.2902,0
+-0.69758,0.68494,0
+-0.4038,0.70687,0
+-0.38076,0.91886,0
+-0.50749,0.90424,0
+-0.54781,0.70687,0
+0.10311,0.77997,0
+0.057028,0.91886,0
+-0.10426,0.99196,0
+-0.081221,1.1089,0
+0.28744,1.087,0
+0.39689,0.82383,0
+0.63882,0.88962,0
+0.82316,0.66301,0
+0.67339,0.64108,0
+1.0709,0.10015,0
+-0.046659,-0.57968,0
+-0.23675,-0.63816,0
+-0.15035,-0.36769,0
+-0.49021,-0.3019,0
+-0.46717,-0.13377,0
+-0.28859,-0.060673,0
+-0.61118,-0.067982,0
+-0.66302,-0.21418,0
+-0.59965,-0.41886,0
+-0.72638,-0.082602,0
+-0.83007,0.31213,0
+-0.72062,0.53874,0
+-0.59389,0.49488,0
+-0.48445,0.99927,0
+-0.0063364,0.99927,0
+0.63265,-0.030612,0
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				{"chunk_definitions":[{"row":14,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-3","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cw3y8fjmo2ayt","chunk_label":"unnamed-chunk-1"},{"row":77,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-4","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cgb1v2g83kknt","chunk_label":"unnamed-chunk-2"},{"row":89,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-5","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c3jleyvkqxnqm","chunk_label":"unnamed-chunk-3"},{"row":95,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-6","dev":"png"},"document_id":"36F8AE4B","chunk_id":"c60fx7tj15bk5","chunk_label":"unnamed-chunk-4"},{"row":111,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-7","dev":"png"},"document_id":"36F8AE4B","chunk_id":"csdwusaa8puvd","chunk_label":"unnamed-chunk-5"},{"row":142,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-18","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cr3h7jd3nr0ya","chunk_label":"unnamed-chunk-6"},{"row":153,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-19","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cpyo5ihaht7o1","chunk_label":"unnamed-chunk-7"},{"row":160,"row_count":1,"visible":true,"expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-20","dev":"png"},"document_id":"36F8AE4B","chunk_id":"cce5y7xzr9zk6","chunk_label":"unnamed-chunk-8"}],"doc_write_time":1769443515}
				`@ -0,0 +1 @@`
				`{"height":432.6328800988875,"width":700.0,"dpi":-1.0,"size_behavior":0,"conditions":[]}`
				`@ -0,0 +1 @@`
				`/home/sek1ro/git/public/lab/ds/25-1/r/9.Rmd="EB7B11F9"`