setwd('/home/sek1ro/git/public/lab/ds/25-1/r') df = read.csv("nbtrain.csv", stringsAsFactors = TRUE) trdf = df[1:9010,] tedf = df[9011:10010,] remove(df) library(e1071) nb = naiveBayes(income ~ age + sex + educ, data = trdf, laplace = 1) # p(A|B)*p(B) = p(AB) = p(B|A)*p(A) # p(A|B) = p(B|A) * p(A) / p(B) # апостер = услов * априор / маргин nb$apriori / sum (nb$apriori) nb$tables pd = predict(nb, tedf) (conf_mat = table(Actual = tedf$income, Predicted = pd)) conf_tot = function(conf_mat) { cat(1 - (sum(diag(conf_mat)) / sum(conf_mat))) } conf_class = function(conf_mat) { for (income in rownames(conf_mat)) { err = 1 - (conf_mat[income, income] / sum(conf_mat[income, ])) cat(sprintf("%s error %.2f%%\n", income, err * 100)) } } conf_tot(conf_mat) conf_class(conf_mat) nb = naiveBayes(sex ~ age + educ + income, data = trdf, laplace = 1) nb$apriori / sum (nb$apriori) nb$tables pd = predict(nb, tedf) (conf_mat = table(Actual = tedf$sex, Predicted = pd)) conf_tot(conf_mat) conf_class(conf_mat) male = trdf[trdf$sex == "M", ] female = trdf[trdf$sex == "F", ] nbrandom = function() { mdf = male[sample(1:nrow(male), 3500),] fdf = female[sample(1:nrow(female), 3500), ] mfdf = rbind(mdf, fdf) mfnb = naiveBayes(sex ~ age + educ + income, data = mfdf, laplace = 1) mfnb$apriori / sum (mfnb$apriori) mfnb$tables mfpd = predict(mfnb, tedf) (mfconf_mat = table(Actual = tedf$sex, Predicted = mfpd)) conf_tot(mfconf_mat) conf_class(mfconf_mat) } set.seed(Sys.time()) nbrandom()