66 lines
1.5 KiB
R
66 lines
1.5 KiB
R
setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
|
|
df = read.csv("nbtrain.csv", stringsAsFactors = TRUE)
|
|
trdf = df[1:9010,]
|
|
tedf = df[9011:10010,]
|
|
remove(df)
|
|
library(e1071)
|
|
|
|
nb = naiveBayes(income ~ age + sex + educ, data = trdf, laplace = 1)
|
|
# p(A|B)*p(B) = p(AB) = p(B|A)*p(A)
|
|
# p(A|B) = p(B|A) * p(A) / p(B)
|
|
# апостер = услов * априор / маргин
|
|
nb$apriori / sum (nb$apriori)
|
|
nb$tables
|
|
|
|
|
|
pd = predict(nb, tedf)
|
|
(conf_mat = table(Actual = tedf$income, Predicted = pd))
|
|
|
|
conf_tot = function(conf_mat) {
|
|
cat(1 - (sum(diag(conf_mat)) / sum(conf_mat)))
|
|
}
|
|
|
|
conf_class = function(conf_mat) {
|
|
for (income in rownames(conf_mat)) {
|
|
err = 1 - (conf_mat[income, income] / sum(conf_mat[income, ]))
|
|
cat(sprintf("%s error %.2f%%\n", income, err * 100))
|
|
}
|
|
}
|
|
|
|
conf_tot(conf_mat)
|
|
conf_class(conf_mat)
|
|
|
|
nb = naiveBayes(sex ~ age + educ + income, data = trdf, laplace = 1)
|
|
nb$apriori / sum (nb$apriori)
|
|
nb$tables
|
|
|
|
pd = predict(nb, tedf)
|
|
(conf_mat = table(Actual = tedf$sex, Predicted = pd))
|
|
conf_tot(conf_mat)
|
|
conf_class(conf_mat)
|
|
|
|
|
|
|
|
male = trdf[trdf$sex == "M", ]
|
|
female = trdf[trdf$sex == "F", ]
|
|
|
|
nbrandom = function() {
|
|
mdf = male[sample(1:nrow(male), 3500),]
|
|
fdf = female[sample(1:nrow(female), 3500), ]
|
|
|
|
mfdf = rbind(mdf, fdf)
|
|
|
|
mfnb = naiveBayes(sex ~ age + educ + income, data = mfdf, laplace = 1)
|
|
mfnb$apriori / sum (mfnb$apriori)
|
|
mfnb$tables
|
|
|
|
mfpd = predict(mfnb, tedf)
|
|
(mfconf_mat = table(Actual = tedf$sex, Predicted = mfpd))
|
|
conf_tot(mfconf_mat)
|
|
conf_class(mfconf_mat)
|
|
}
|
|
|
|
set.seed(Sys.time())
|
|
nbrandom()
|
|
|