renaming
This commit is contained in:
226
5/data science/r/.Rhistory
Normal file
226
5/data science/r/.Rhistory
Normal file
@ -0,0 +1,226 @@
|
||||
a <- 3
|
||||
v <- c(1,2,3,4,5)
|
||||
q()
|
||||
a <- 3
|
||||
v<-c(1,2,3,4,5)
|
||||
read.table("git/public/lab/ds/25-1/r2/zipIncome.txt")
|
||||
db = read.table("git/public/lab/ds/25-1/r2/zipIncome.txt")
|
||||
levels<-c(1,2,3)
|
||||
ratings<-c("a", "b", "c")
|
||||
f<-factor(ratings, levels)
|
||||
print(f)
|
||||
print(f.size())
|
||||
print(f)
|
||||
print(levels.default())
|
||||
print(levels[0])
|
||||
print(levels[1])
|
||||
print(ratings[1])
|
||||
print(ratings[2])
|
||||
print(ratings[0])
|
||||
print(ratings[4])
|
||||
print(ratings[3])
|
||||
levels<-("a", "b", "a", "c")
|
||||
levels<-("a", "b", "a", "c")
|
||||
levels<-c("a", "b", "a", "c")
|
||||
f<-factor(ratings, levels)
|
||||
tmp<-levels
|
||||
levels<-ratings
|
||||
ratings<-tmp
|
||||
remove(tmp)
|
||||
f<-factor(ratings, levels)
|
||||
f[0]
|
||||
f[1]
|
||||
f[2]
|
||||
f[3]
|
||||
f[4]
|
||||
f[5]
|
||||
pi
|
||||
e
|
||||
exp
|
||||
exp()
|
||||
v <- c(1:10)
|
||||
w <- c(15:24)
|
||||
x <- v
|
||||
x = v
|
||||
x <- v * w
|
||||
db[1:3]
|
||||
db[1:2]
|
||||
db[1]
|
||||
db[1,]
|
||||
db[1:2,]
|
||||
db[2,]
|
||||
db[1,]
|
||||
db[,1]
|
||||
db$state
|
||||
db
|
||||
db$V1
|
||||
db$q
|
||||
db[,1]
|
||||
db[1,]
|
||||
db$zip_prefixes
|
||||
db$name
|
||||
db["zip_prefixes"]
|
||||
df <- data.frame(name=c("a"))
|
||||
fd
|
||||
df
|
||||
df$name
|
||||
df[]
|
||||
df[1]
|
||||
df[,1]
|
||||
df[,2]
|
||||
df[1,]
|
||||
class(df)
|
||||
printd
|
||||
print(d)
|
||||
summary(x)
|
||||
var(x)
|
||||
sd(x)
|
||||
range(x)
|
||||
plot(x)
|
||||
hist(x)
|
||||
str(x)
|
||||
help(plot)
|
||||
setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
|
||||
survey <- read.csv('survey.csv')
|
||||
head(survey)
|
||||
survey$price20 <- ifelse(survey$Price == 20, 1, 0)
|
||||
survey$price30 <- ifelse(survey$Price == 30, 1, 0)
|
||||
head(survey)
|
||||
survey$one <- 1
|
||||
model <- glm(
|
||||
MYDEPV ~ Income + Age + price20 + price30,
|
||||
binomial(link = "logit"),
|
||||
survey
|
||||
)
|
||||
summary(model)
|
||||
quantile(residuals(model))
|
||||
pred_test = predict(tree, test_df, type="class")
|
||||
setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
|
||||
survey <- read.csv('survey.csv')
|
||||
train_df = survey[1:600,]
|
||||
test_df = survey[601:750,]
|
||||
setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
|
||||
survey <- read.csv('survey.csv')
|
||||
train_df = survey[1:600,]
|
||||
test_df = survey[601:750,]
|
||||
library(rpart)
|
||||
tree = rpart(
|
||||
MYDEPV ~ Price + Income + Age,
|
||||
data = train_df,
|
||||
method = "class",
|
||||
parms = list(split = "information"),
|
||||
control = rpart.control(
|
||||
xval = 3,
|
||||
),
|
||||
)
|
||||
printcp(tree)
|
||||
library(rpart.plot)
|
||||
rpart.plot(
|
||||
tree,
|
||||
type = 1,
|
||||
extra = 106,
|
||||
#6 Class models: the probability of the second class only. Useful for binary responses.
|
||||
#100 display the percentage of observations in the node.
|
||||
fallen.leaves = TRUE,
|
||||
)
|
||||
pred_class = predict(tree, train_df, type="class")
|
||||
conf_mat = table(
|
||||
Actual = train_df$MYDEPV,
|
||||
Predicted = pred_class
|
||||
)
|
||||
conf_mat
|
||||
print(diag(conf_mat) / rowSums(conf_mat))
|
||||
print(1 - sum(diag(conf_mat)) / sum(conf_mat))
|
||||
pred_prob = predict(tree, train_df, type="prob")[,2]
|
||||
library(ROCR)
|
||||
pred = prediction(pred_prob, train_df$MYDEPV)
|
||||
perf = performance(pred, "tpr", "fpr")
|
||||
plot(perf)
|
||||
abline(a = 0, b = 1)
|
||||
auc_perf = performance(pred, measure = "auc")
|
||||
auc_perf@y.values[[1]]
|
||||
pred_test = predict(tree, test_df, type="class")
|
||||
conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)
|
||||
conf_mat_test
|
||||
print(diag(conf_mat) / rowSums(conf_mat))
|
||||
tree_gini = rpart(
|
||||
MYDEPV ~ Price + Income + Age,
|
||||
data = train_df,
|
||||
method = "class",
|
||||
parms = list(split = "gini")
|
||||
)
|
||||
printcp(tree_gini)
|
||||
rpart.plot(
|
||||
tree_gini,
|
||||
type = 1,
|
||||
extra = 106,
|
||||
fallen.leaves = TRUE,
|
||||
)
|
||||
best_cp <- tree$cptable[which.min(tree_dini$cptable[, "xerror"]), "CP"]
|
||||
best_cp <- tree_dini$cptable[which.min(tree_dini$cptable[, "xerror"]), "CP"]
|
||||
best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
|
||||
best_cp
|
||||
best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
|
||||
best_cp
|
||||
pruned_tree = prune(tree_gini, cp = best_cp)
|
||||
best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
|
||||
best_cp
|
||||
pruned_tree = prune(tree_gini, cp = best_cp)
|
||||
printcp(pruned_tree)
|
||||
best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
|
||||
best_cp
|
||||
pruned_tree = prune(tree_gini, cp = best_cp)
|
||||
printcp(pruned_tree)
|
||||
rpart.plot(pruned_tree)
|
||||
pred_test = predict(tree, test_df, type="class")
|
||||
conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)
|
||||
conf_mat_test
|
||||
print(diag(conf_mat_test) / rowSums(conf_mat_test))
|
||||
tree_gini = rpart(
|
||||
MYDEPV ~ Price + Income + Age,
|
||||
data = train_df,
|
||||
method = "class",
|
||||
parms = list(split = "gini")
|
||||
)
|
||||
printcp(tree_gini)
|
||||
rpart.plot(
|
||||
tree_gini,
|
||||
type = 1,
|
||||
extra = 106,
|
||||
fallen.leaves = TRUE,
|
||||
)
|
||||
best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
|
||||
best_cp
|
||||
pruned_tree = prune(tree_gini, cp = best_cp)
|
||||
printcp(pruned_tree)
|
||||
rpart.plot(pruned_tree)
|
||||
pruned_pred = predict(pruned_tree, test_df, type="class")
|
||||
pruned_conf_mat = table(Actual = test_df$MYDEPV, Predicted = pruned_pred)
|
||||
pruned_conf_mat
|
||||
print(diag(pruned_conf_mat) / rowSums(pruned_conf_mat))
|
||||
pred_test = predict(tree, test_df, type="class")
|
||||
conf_mat_test = table(Actual = test_df$MYDEPV, Predicted = pred_test)
|
||||
conf_mat_test
|
||||
print(diag(conf_mat_test) / rowSums(conf_mat_test))
|
||||
tree_gini = rpart(
|
||||
MYDEPV ~ Price + Income + Age,
|
||||
data = train_df,
|
||||
method = "class",
|
||||
parms = list(split = "gini")
|
||||
)
|
||||
printcp(tree_gini)
|
||||
rpart.plot(
|
||||
tree_gini,
|
||||
type = 1,
|
||||
extra = 106,
|
||||
fallen.leaves = TRUE,
|
||||
)
|
||||
best_cp <- tree_gini$cptable[which.min(tree_gini$cptable[, "xerror"]), "CP"]
|
||||
best_cp
|
||||
pruned_tree = prune(tree_gini, cp = best_cp)
|
||||
printcp(pruned_tree)
|
||||
rpart.plot(pruned_tree)
|
||||
pruned_pred = predict(pruned_tree, test_df, type="class")
|
||||
pruned_conf_mat = table(Actual = test_df$MYDEPV, Predicted = pruned_pred)
|
||||
pruned_conf_mat
|
||||
print(diag(pruned_conf_mat) / rowSums(pruned_conf_mat))
|
||||
Reference in New Issue
Block a user