Files
lab/ds/25-1/r/8.R
2025-12-11 14:58:04 +03:00

66 lines
1.5 KiB
R

setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
df = read.csv("nbtrain.csv", stringsAsFactors = TRUE)
trdf = df[1:9010,]
tedf = df[9011:10010,]
remove(df)
library(e1071)
nb = naiveBayes(income ~ age + sex + educ, data = trdf, laplace = 1)
# p(A|B)*p(B) = p(AB) = p(B|A)*p(A)
# p(A|B) = p(B|A) * p(A) / p(B)
# апостер = услов * априор / маргин
nb$apriori / sum (nb$apriori)
nb$tables
pd = predict(nb, tedf)
(conf_mat = table(Actual = tedf$income, Predicted = pd))
conf_tot = function(conf_mat) {
cat(1 - (sum(diag(conf_mat)) / sum(conf_mat)))
}
conf_class = function(conf_mat) {
for (income in rownames(conf_mat)) {
err = 1 - (conf_mat[income, income] / sum(conf_mat[income, ]))
cat(sprintf("%s error %.2f%%\n", income, err * 100))
}
}
conf_tot(conf_mat)
conf_class(conf_mat)
nb = naiveBayes(sex ~ age + educ + income, data = trdf, laplace = 1)
nb$apriori / sum (nb$apriori)
nb$tables
pd = predict(nb, tedf)
(conf_mat = table(Actual = tedf$sex, Predicted = pd))
conf_tot(conf_mat)
conf_class(conf_mat)
male = trdf[trdf$sex == "M", ]
female = trdf[trdf$sex == "F", ]
nbrandom = function() {
mdf = male[sample(1:nrow(male), 3500),]
fdf = female[sample(1:nrow(female), 3500), ]
mfdf = rbind(mdf, fdf)
mfnb = naiveBayes(sex ~ age + educ + income, data = mfdf, laplace = 1)
mfnb$apriori / sum (mfnb$apriori)
mfnb$tables
mfpd = predict(mfnb, tedf)
(mfconf_mat = table(Actual = tedf$sex, Predicted = mfpd))
conf_tot(mfconf_mat)
conf_class(mfconf_mat)
}
set.seed(Sys.time())
nbrandom()