Files
lab/ds/25-1/r/5.R
2025-12-11 14:58:04 +03:00

87 lines
2.5 KiB
R

setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
library(arules)
library(arulesViz)
ts = read.transactions("AssociationRules.csv",
sep = " ",
rm.duplicates = TRUE)
itemFrequencyPlot(ts, type = "absolute", topN = 10)
ift = sort(itemFrequency(ts), decreasing = TRUE)
(most_frequent_item = ift[1])
(max_ts_size = max(size(ts)))
rules = apriori(ts, parameter = list(support = 0.01, confidence = 0))
length(rules)
plot(rules, jitter = 0)
rules50 = apriori(ts, parameter = list(support = 0.01, confidence = 0.5))
length(rules50)
plot(rules50, jitter = 0)
library(ggplot2)
asc = function(q, colors = c("lightgray", "red")) {
q = q[order(q$lift), ]
ggplot(q, aes(x = support, y = confidence, color = lift)) +
geom_point() +
ylim(0, 1) +
xlim(0, 0.5) +
theme_minimal() +
scale_color_gradientn(
colors = colors,
name = "Lift",
limits = c(min(q$lift), max(q$lift))
)
}
quality50 = as.data.frame(quality(rules50))
asc(quality50, colors = c("navy", "cyan"))
quality = as.data.frame(quality(rules))
asc(subset(quality, quality$confidence > 0.5))
plot(rules, measure = c("support", "lift"), engine = "interactive", shading = "confidence")
plot(rules, engine = "interactive")
filt_rules = rules[which(quality(rules)$confidence > 0.8)]
quality = as.data.frame(quality(filt_rules))
quality = quality[order(-quality$lift),]
tail(quality, 10)
plot(filt_rules,
method = "matrix",
shading = c("lift", "confidence"),
engine = "grid")
top3_rules = head(sort(filt_rules, by = "lift", decreasing = TRUE), 3)
plot(top3_rules, method = "graph")
train_set = ts[1:8000]
test_set = ts[8001:10000]
train_rules = apriori(train_set, parameter = list(support = 0.01, confidence = 0.5))
test_quality = interestMeasure(train_rules,
measure = c("support", "confidence", "lift", "coverage"),
transactions = test_set)
comparison <- data.frame(
train_support = quality(train_rules)$support[1:10],
test_support = test_quality$support[1:10],
train_lift = quality(train_rules)$lift[1:10],
test_lift = test_quality$lift[1:10]
)
print(comparison)
plot(comparison$train_lift, comparison$test_lift,
xlab = "train lift",
ylab = "test lift",
pch = 19)
abline(0, 1, lty = 2)
# График для support
plot(comparison$train_support, comparison$test_support,
xlab = "train support",
ylab = "test support",
pch = 19)
abline(0, 1, lty = 2)