--- title: "Lab5: Associative rules, Apriori" author: "Vladislav Litvinov " output: pdf_document: toc_float: TRUE --- # Histogram of transaction frequencies ```{r} setwd('/home/sek1ro/git/public/lab/ds/25-1/r') library(arules) library(arulesViz) ts = read.transactions("AssociationRules.csv", sep = " ", rm.duplicates = TRUE) itemFrequencyPlot(ts, type = "absolute", topN = 10) ift = sort(itemFrequency(ts), decreasing = TRUE) (most_frequent_item = ift[1]) (max_ts_size = max(size(ts))) ``` # Model training. Rules computing ```{r} rules = apriori(ts, parameter = list(support = 0.01, confidence = 0)) length(rules) plot(rules, jitter = 0) rules50 = apriori(ts, parameter = list(support = 0.01, confidence = 0.5)) length(rules50) plot(rules50, jitter = 0) ``` # Manual threshold applying: confidence = 0.5, plots comparsion ```{r} library(ggplot2) asc = function(q, colors = c("lightgray", "red")) { q = q[order(q$lift), ] ggplot(q, aes(x = support, y = confidence, color = lift)) + geom_point() + ylim(0, 1) + xlim(0, 0.5) + theme_minimal() + scale_color_gradientn( colors = colors, name = "Lift", limits = c(min(q$lift), max(q$lift)) ) } quality50 = as.data.frame(quality(rules50)) asc(quality50, colors = c("navy", "cyan")) quality = as.data.frame(quality(rules)) asc(subset(quality, quality$confidence > 0.5)) ``` # Top 3 rules by lift. 'Relation' graph and matrix ```{r} plot(rules, measure = c("support", "lift"), shading = "confidence") plot(rules) filt_rules = rules[which(quality(rules)$confidence > 0.8)] quality = as.data.frame(quality(filt_rules)) quality = quality[order(-quality$lift),] tail(quality, 10) plot(filt_rules, method = "matrix", shading = c("lift", "confidence"), engine = "grid") top3_rules = head(sort(filt_rules, by = "lift", decreasing = TRUE), 3) plot(top3_rules, method = "graph") ``` # Random picking of train and test datasets ```{r} train_set = ts[1:8000] test_set = ts[8001:10000] train_rules = apriori(train_set, parameter = list(support = 0.01, confidence = 0.5)) test_quality = interestMeasure(train_rules, measure = c("support", "confidence", "lift", "coverage"), transactions = test_set) comparison <- data.frame( train_support = quality(train_rules)$support[1:10], test_support = test_quality$support[1:10], train_lift = quality(train_rules)$lift[1:10], test_lift = test_quality$lift[1:10] ) print(comparison) plot(comparison$train_lift, comparison$test_lift, xlab = "train lift", ylab = "test lift", pch = 19) abline(0, 1, lty = 2) # График для support plot(comparison$train_support, comparison$test_support, xlab = "train support", ylab = "test support", pch = 19) abline(0, 1, lty = 2) ```