49 lines
1.2 KiB
R
49 lines
1.2 KiB
R
setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
|
|
survey <- read.csv('survey.csv')
|
|
|
|
head(survey)
|
|
|
|
survey$price20 <- ifelse(survey$Price == 20, 1, 0)
|
|
survey$price30 <- ifelse(survey$Price == 30, 1, 0)
|
|
head(survey)
|
|
|
|
survey$one <- 1
|
|
|
|
#https://stats.stackexchange.com/questions/48178/how-to-interpret-the-intercept-term-in-a-glm
|
|
|
|
model <- glm(
|
|
MYDEPV ~ Income + Age + price20 + price30,
|
|
binomial(link = "logit"),
|
|
survey
|
|
)
|
|
summary(model)
|
|
quantile(residuals(model))
|
|
#https://library.virginia.edu/data/articles/understanding-deviance-residuals
|
|
#Residuals are the differences between what we observe and what our model predicts.
|
|
#Residuals greater than the absolute value of 3 are in the tails of a standard normal distribution and usually indicate strain in the model.
|
|
|
|
beta_income <- coef(model)["Income"]
|
|
pct_income <- (exp(beta_income) - 1) * 100
|
|
pct_income
|
|
|
|
beta_price30 <- coef(model)["price30"]
|
|
pct_price30 <- (exp(beta_price30 * 20) - 1) * 100
|
|
pct_price30
|
|
|
|
survey$odds_ratio <- exp(predict(model))
|
|
survey$prediction <- survey$odds_ratio / (1 + survey$odds_ratio)
|
|
head(survey)
|
|
|
|
sum(survey$MYDEPV)
|
|
sum(survey$prediction)
|
|
|
|
new_person <- data.frame(
|
|
Income = 58,
|
|
Age = 25,
|
|
price20 = 1,
|
|
price30 = 0
|
|
)
|
|
|
|
prob <- predict(model, new_person, type="response")
|
|
prob
|