renaming
This commit is contained in:
78
5/data science/r/8.rmd
Normal file
78
5/data science/r/8.rmd
Normal file
@ -0,0 +1,78 @@
|
||||
---
|
||||
title: "Lab8: Naive bayes classifier"
|
||||
author: "Vladislav Litvinov <vlad@sek1ro>"
|
||||
output:
|
||||
pdf_document:
|
||||
toc_float: TRUE
|
||||
---
|
||||
# Data splitting (test, train datasets)
|
||||
```{r}
|
||||
setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
|
||||
df = read.csv("nbtrain.csv", stringsAsFactors = TRUE)
|
||||
trdf = df[1:9010,]
|
||||
tedf = df[9011:10010,]
|
||||
remove(df)
|
||||
library(e1071)
|
||||
```
|
||||
# Model training
|
||||
```{r}
|
||||
nb = naiveBayes(income ~ age + sex + educ, data = trdf, laplace = 1)
|
||||
# p(A|B)*p(B) = p(AB) = p(B|A)*p(A)
|
||||
# p(A|B) = p(B|A) * p(A) / p(B)
|
||||
# апостер = услов * априор / маргин
|
||||
nb$apriori / sum (nb$apriori)
|
||||
nb$tables
|
||||
```
|
||||
# Model testing. Confidence total and confidence by class
|
||||
```{r}
|
||||
pd = predict(nb, tedf)
|
||||
(conf_mat = table(Actual = tedf$income, Predicted = pd))
|
||||
|
||||
conf_tot = function(conf_mat) {
|
||||
cat(1 - (sum(diag(conf_mat)) / sum(conf_mat)))
|
||||
}
|
||||
|
||||
conf_class = function(conf_mat) {
|
||||
for (income in rownames(conf_mat)) {
|
||||
err = 1 - (conf_mat[income, income] / sum(conf_mat[income, ]))
|
||||
cat(sprintf("%s error %.2f%%\n", income, err * 100))
|
||||
}
|
||||
}
|
||||
|
||||
conf_tot(conf_mat)
|
||||
conf_class(conf_mat)
|
||||
|
||||
nb = naiveBayes(sex ~ age + educ + income, data = trdf, laplace = 1)
|
||||
nb$apriori / sum (nb$apriori)
|
||||
nb$tables
|
||||
|
||||
pd = predict(nb, tedf)
|
||||
(conf_mat = table(Actual = tedf$sex, Predicted = pd))
|
||||
conf_tot(conf_mat)
|
||||
conf_class(conf_mat)
|
||||
```
|
||||
# Separated male and female
|
||||
```{r}
|
||||
male = trdf[trdf$sex == "M", ]
|
||||
female = trdf[trdf$sex == "F", ]
|
||||
|
||||
nbrandom = function() {
|
||||
mdf = male[sample(1:nrow(male), 3500),]
|
||||
fdf = female[sample(1:nrow(female), 3500), ]
|
||||
|
||||
mfdf = rbind(mdf, fdf)
|
||||
|
||||
mfnb = naiveBayes(sex ~ age + educ + income, data = mfdf, laplace = 1)
|
||||
mfnb$apriori / sum (mfnb$apriori)
|
||||
mfnb$tables
|
||||
|
||||
mfpd = predict(mfnb, tedf)
|
||||
(mfconf_mat = table(Actual = tedf$sex, Predicted = mfpd))
|
||||
conf_tot(mfconf_mat)
|
||||
conf_class(mfconf_mat)
|
||||
}
|
||||
|
||||
set.seed(Sys.time())
|
||||
nbrandom()
|
||||
|
||||
```
|
||||
Reference in New Issue
Block a user