87 lines
2.5 KiB
R
87 lines
2.5 KiB
R
setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
|
|
library(arules)
|
|
library(arulesViz)
|
|
ts = read.transactions("AssociationRules.csv",
|
|
sep = " ",
|
|
rm.duplicates = TRUE)
|
|
|
|
itemFrequencyPlot(ts, type = "absolute", topN = 10)
|
|
ift = sort(itemFrequency(ts), decreasing = TRUE)
|
|
|
|
(most_frequent_item = ift[1])
|
|
(max_ts_size = max(size(ts)))
|
|
|
|
rules = apriori(ts, parameter = list(support = 0.01, confidence = 0))
|
|
length(rules)
|
|
plot(rules, jitter = 0)
|
|
|
|
rules50 = apriori(ts, parameter = list(support = 0.01, confidence = 0.5))
|
|
length(rules50)
|
|
plot(rules50, jitter = 0)
|
|
|
|
library(ggplot2)
|
|
asc = function(q, colors = c("lightgray", "red")) {
|
|
q = q[order(q$lift), ]
|
|
ggplot(q, aes(x = support, y = confidence, color = lift)) +
|
|
geom_point() +
|
|
ylim(0, 1) +
|
|
xlim(0, 0.5) +
|
|
theme_minimal() +
|
|
scale_color_gradientn(
|
|
colors = colors,
|
|
name = "Lift",
|
|
limits = c(min(q$lift), max(q$lift))
|
|
)
|
|
}
|
|
|
|
quality50 = as.data.frame(quality(rules50))
|
|
asc(quality50, colors = c("navy", "cyan"))
|
|
|
|
quality = as.data.frame(quality(rules))
|
|
asc(subset(quality, quality$confidence > 0.5))
|
|
|
|
plot(rules, measure = c("support", "lift"), engine = "interactive", shading = "confidence")
|
|
plot(rules, engine = "interactive")
|
|
|
|
filt_rules = rules[which(quality(rules)$confidence > 0.8)]
|
|
quality = as.data.frame(quality(filt_rules))
|
|
quality = quality[order(-quality$lift),]
|
|
tail(quality, 10)
|
|
|
|
plot(filt_rules,
|
|
method = "matrix",
|
|
shading = c("lift", "confidence"),
|
|
engine = "grid")
|
|
|
|
top3_rules = head(sort(filt_rules, by = "lift", decreasing = TRUE), 3)
|
|
plot(top3_rules, method = "graph")
|
|
|
|
|
|
train_set = ts[1:8000]
|
|
test_set = ts[8001:10000]
|
|
|
|
train_rules = apriori(train_set, parameter = list(support = 0.01, confidence = 0.5))
|
|
test_quality = interestMeasure(train_rules,
|
|
measure = c("support", "confidence", "lift", "coverage"),
|
|
transactions = test_set)
|
|
comparison <- data.frame(
|
|
train_support = quality(train_rules)$support[1:10],
|
|
test_support = test_quality$support[1:10],
|
|
train_lift = quality(train_rules)$lift[1:10],
|
|
test_lift = test_quality$lift[1:10]
|
|
)
|
|
|
|
print(comparison)
|
|
plot(comparison$train_lift, comparison$test_lift,
|
|
xlab = "train lift",
|
|
ylab = "test lift",
|
|
pch = 19)
|
|
abline(0, 1, lty = 2)
|
|
|
|
# График для support
|
|
plot(comparison$train_support, comparison$test_support,
|
|
xlab = "train support",
|
|
ylab = "test support",
|
|
pch = 19)
|
|
abline(0, 1, lty = 2)
|