renaming
This commit is contained in:
103
5/data science/r/5.rmd
Normal file
103
5/data science/r/5.rmd
Normal file
@ -0,0 +1,103 @@
|
||||
---
|
||||
title: "Lab5: Associative rules, Apriori"
|
||||
author: "Vladislav Litvinov <vlad@sek1ro>"
|
||||
output:
|
||||
pdf_document:
|
||||
toc_float: TRUE
|
||||
---
|
||||
# Histogram of transaction frequencies
|
||||
```{r}
|
||||
setwd('/home/sek1ro/git/public/lab/ds/25-1/r')
|
||||
library(arules)
|
||||
library(arulesViz)
|
||||
ts = read.transactions("AssociationRules.csv",
|
||||
sep = " ",
|
||||
rm.duplicates = TRUE)
|
||||
|
||||
itemFrequencyPlot(ts, type = "absolute", topN = 10)
|
||||
ift = sort(itemFrequency(ts), decreasing = TRUE)
|
||||
|
||||
(most_frequent_item = ift[1])
|
||||
(max_ts_size = max(size(ts)))
|
||||
```
|
||||
# Model training. Rules computing
|
||||
```{r}
|
||||
rules = apriori(ts, parameter = list(support = 0.01, confidence = 0))
|
||||
length(rules)
|
||||
plot(rules, jitter = 0)
|
||||
|
||||
rules50 = apriori(ts, parameter = list(support = 0.01, confidence = 0.5))
|
||||
length(rules50)
|
||||
plot(rules50, jitter = 0)
|
||||
```
|
||||
# Manual threshold applying: confidence = 0.5, plots comparsion
|
||||
```{r}
|
||||
library(ggplot2)
|
||||
asc = function(q, colors = c("lightgray", "red")) {
|
||||
q = q[order(q$lift), ]
|
||||
ggplot(q, aes(x = support, y = confidence, color = lift)) +
|
||||
geom_point() +
|
||||
ylim(0, 1) +
|
||||
xlim(0, 0.5) +
|
||||
theme_minimal() +
|
||||
scale_color_gradientn(
|
||||
colors = colors,
|
||||
name = "Lift",
|
||||
limits = c(min(q$lift), max(q$lift))
|
||||
)
|
||||
}
|
||||
|
||||
quality50 = as.data.frame(quality(rules50))
|
||||
asc(quality50, colors = c("navy", "cyan"))
|
||||
|
||||
quality = as.data.frame(quality(rules))
|
||||
asc(subset(quality, quality$confidence > 0.5))
|
||||
```
|
||||
# Top 3 rules by lift. 'Relation' graph and matrix
|
||||
```{r}
|
||||
plot(rules, measure = c("support", "lift"), shading = "confidence")
|
||||
plot(rules)
|
||||
|
||||
filt_rules = rules[which(quality(rules)$confidence > 0.8)]
|
||||
quality = as.data.frame(quality(filt_rules))
|
||||
quality = quality[order(-quality$lift),]
|
||||
tail(quality, 10)
|
||||
|
||||
plot(filt_rules,
|
||||
method = "matrix",
|
||||
shading = c("lift", "confidence"),
|
||||
engine = "grid")
|
||||
|
||||
top3_rules = head(sort(filt_rules, by = "lift", decreasing = TRUE), 3)
|
||||
plot(top3_rules, method = "graph")
|
||||
```
|
||||
# Random picking of train and test datasets
|
||||
```{r}
|
||||
train_set = ts[1:8000]
|
||||
test_set = ts[8001:10000]
|
||||
|
||||
train_rules = apriori(train_set, parameter = list(support = 0.01, confidence = 0.5))
|
||||
test_quality = interestMeasure(train_rules,
|
||||
measure = c("support", "confidence", "lift", "coverage"),
|
||||
transactions = test_set)
|
||||
comparison <- data.frame(
|
||||
train_support = quality(train_rules)$support[1:10],
|
||||
test_support = test_quality$support[1:10],
|
||||
train_lift = quality(train_rules)$lift[1:10],
|
||||
test_lift = test_quality$lift[1:10]
|
||||
)
|
||||
|
||||
print(comparison)
|
||||
plot(comparison$train_lift, comparison$test_lift,
|
||||
xlab = "train lift",
|
||||
ylab = "test lift",
|
||||
pch = 19)
|
||||
abline(0, 1, lty = 2)
|
||||
|
||||
# График для support
|
||||
plot(comparison$train_support, comparison$test_support,
|
||||
xlab = "train support",
|
||||
ylab = "test support",
|
||||
pch = 19)
|
||||
abline(0, 1, lty = 2)
|
||||
```
|
||||
Reference in New Issue
Block a user