Using Apriori to study grocery store customer purchasing behavior
By Eric Stromgren | ericstromgren.com | Connect with me on LinkedIn
October 20, 2020 at 9:57:51 AM MST
## Markdown Setup
#Time it
start.time <- Sys.time()
#Markdown Options
knitr::opts_chunk$set(echo = TRUE,
warning=FALSE,
message=FALSE,
widgetframe_self_contained = FALSE,
widgetframe_isolate_widgets = TRUE)
#Read in custom functions
source("D:/data_projects/functions/eda_libraries.R")
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:e1071':
##
## impute
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
source("D:/data_projects/functions/audit_numeric_summary.R")
source("D:/data_projects/functions/audit_numeric_viz.R")
source("D:/data_projects/functions/audit_factor_summary.R")
#Set seed for reproducibility
set.seed(218)
#Read in Data and Clean Column Names with the Janitor library
mb_data <- read.csv("D:/data_projects/market_basket_analysis/Market_Basket_Optimisation.csv", header=FALSE)
#Inspect Raw Data
datatable(head(mb_data, 5),
rownames=FALSE,
class = 'cell-border stripe compact hover',
caption = "Inspect Raw Data: First 5 Rows")
#Remove white spaces, consistency of leading, trailing and spaces between bigrams tough to discern.
mb_data <- as.data.frame(apply(mb_data,2,function(x)gsub('\\s+', '',x)))
#Add ID column
mb_data <- tibble::rowid_to_column(mb_data, "transaction_id")
#Rename columns
colnames(mb_data) <- c("transaction_id",
"item_1",
"item_2",
"item_3",
"item_4",
"item_5",
"item_6",
"item_7",
"item_8",
"item_9",
"item_10",
"item_11",
"item_12",
"item_13",
"item_14",
"item_15",
"item_16",
"item_17",
"item_18",
"item_19",
"item_20"
)
#Inspect Clean Data
datatable(head(mb_data, 5),
rownames=FALSE,
class = 'cell-border stripe compact hover',
caption = "Inspect Clean Data: First 5 Rows")
### One Column Dataframe containing all items purchased at this store.
#subset to only columns with items
mb_data_no_id <- subset(mb_data, select=-c(transaction_id))
#Put all items into one column
mb_data_items <- data.frame(item=unlist(mb_data_no_id, use.names = FALSE))
#Set and remove NA values
mb_data_items[mb_data_items==""]<-NA
mb_data_items <- data.frame(na.omit(mb_data_items))
audit_factor_summary(mb_data_items, presentation = TRUE)
## [1] "item"
## item
count
pct
mineralwater
1788
0.0609
eggs
1348
0.0459
spaghetti
1306
0.0445
frenchfries
1282
0.0437
chocolate
1230
0.0419
greentea
991
0.0337
milk
972
0.0331
groundbeef
737
0.0251
frozenvegetables
715
0.0244
pancakes
713
0.0243
burgers
654
0.0223
cake
608
0.0207
cookies
603
0.0205
escalope
595
0.0203
lowfatyogurt
574
0.0195
shrimp
536
0.0183
tomatoes
513
0.0175
oliveoil
494
0.0168
frozensmoothie
475
0.0162
turkey
469
0.0160
chicken
450
0.0153
wholewheatrice
439
0.0150
gratedcheese
393
0.0134
cookingoil
383
0.0130
soup
379
0.0129
herb&pepper
371
0.0126
honey
356
0.0121
champagne
351
0.0120
freshbread
323
0.0110
salmon
319
0.0109
brownies
253
0.0086
avocado
250
0.0085
hotdogs
243
0.0083
cottagecheese
239
0.0081
tomatojuice
228
0.0078
butter
226
0.0077
wholewheatpasta
221
0.0075
redwine
211
0.0072
yogurtcake
205
0.0070
lightmayo
204
0.0069
energybar
203
0.0069
ham
203
0.0069
energydrink
200
0.0068
pepper
199
0.0068
cereals
193
0.0066
vegetablesmix
193
0.0066
muffins
181
0.0062
oil
173
0.0059
frenchwine
169
0.0058
freshtuna
167
0.0057
strawberries
160
0.0054
meatballs
157
0.0053
almonds
153
0.0052
parmesancheese
149
0.0051
mushroomcreamsauce
143
0.0049
rice
141
0.0048
proteinbar
139
0.0047
mint
131
0.0045
whitewine
124
0.0042
pasta
118
0.0040
lightcream
117
0.0040
carrots
115
0.0039
blacktea
107
0.0036
tomatosauce
106
0.0036
fromageblanc
102
0.0035
gums
101
0.0034
eggplant
99
0.0034
extradarkchocolate
90
0.0031
melons
90
0.0031
bodyspray
86
0.0029
yams
86
0.0029
magazines
82
0.0028
barbecuesauce
81
0.0028
cider
79
0.0027
nonfatmilk
78
0.0027
candybars
73
0.0025
zucchini
71
0.0024
wholeweatflour
70
0.0024
blueberries
69
0.0023
salt
69
0.0023
flaxseed
68
0.0023
greengrapes
68
0.0023
antioxydantjuice
67
0.0023
bacon
65
0.0022
bugspray
65
0.0022
greenbeans
65
0.0022
clothesaccessories
63
0.0021
toothpaste
61
0.0021
shallot
58
0.0020
strongcheese
58
0.0020
spinach
53
0.0018
glutenfreebar
52
0.0018
petfood
49
0.0017
soda
47
0.0016
sparklingwater
47
0.0016
chili
46
0.0016
mayonnaise
46
0.0016
pickles
45
0.0015
burgersauce
44
0.0015
mintgreentea
42
0.0014
handproteinbar
39
0.0013
salad
37
0.0013
shampoo
37
0.0013
asparagus
36
0.0012
cauliflower
36
0.0012
corn
36
0.0012
babiesfood
34
0.0012
sandwich
34
0.0012
dessertwine
33
0.0011
ketchup
33
0.0011
oatmeal
33
0.0011
chocolatebread
32
0.0011
chutney
31
0.0011
mashedpotato
31
0.0011
tea
29
0.0010
bramble
14
0.0005
cream
7
0.0002
napkins
5
0.0002
waterspray
3
0.0001
## [1] "Done Processing"
## [1] "1 variables processed"
## [1] "Function Efficiency Statistics"
## user system elapsed
## 1.00 0.08 1.09
#Define NAs in the dataset
mb_data[mb_data==""]<-NA
#non-NAs by item number sold
df_1 <- mb_data %>%
summarise_all(funs(sum(!is.na(.))))
df_1 <- data.frame(non_na_values = t(df_1))
#NAs
df_2 <- mb_data %>%
summarise_all(funs(sum(is.na(.))))
df_2 <- data.frame(na_values = t(df_2))
#Merge Frames
df_3 <- data.frame(merge(df_1, df_2, by = 0, all=TRUE))
df_3 <- data.frame(names = row.names(df_3), df_3)
df_3 <- df_3[order(-df_3$non_na_values),]
datatable(df_3,
rownames=FALSE,
class = 'cell-border stripe compact hover',
caption = "NA Summary")
audit_factor_summary(mb_data, presentation=TRUE)
## [1] "item_1"
## item_1
count
pct
mineralwater
577
0.0769
burgers
576
0.0768
turkey
458
0.0611
chocolate
391
0.0521
frozenvegetables
373
0.0497
spaghetti
354
0.0472
shrimp
325
0.0433
gratedcheese
293
0.0391
eggs
279
0.0372
cookies
270
0.0360
frenchfries
244
0.0325
herb&pepper
232
0.0309
groundbeef
218
0.0291
tomatoes
212
0.0283
milk
181
0.0241
escalope
143
0.0191
freshtuna
129
0.0172
redwine
123
0.0164
ham
120
0.0160
cake
98
0.0131
greentea
97
0.0129
wholewheatpasta
95
0.0127
pancakes
80
0.0107
soup
78
0.0104
muffins
69
0.0092
energybar
67
0.0089
oliveoil
67
0.0089
champagne
64
0.0085
pepper
61
0.0081
avocado
57
0.0076
butter
52
0.0069
parmesancheese
51
0.0068
wholewheatrice
47
0.0063
lowfatyogurt
46
0.0061
chicken
44
0.0059
vegetablesmix
39
0.0052
pickles
38
0.0051
meatballs
33
0.0044
frozensmoothie
32
0.0043
yogurtcake
31
0.0041
salmon
29
0.0039
dessertwine
28
0.0037
hotdogs
28
0.0037
honey
27
0.0036
candybars
25
0.0033
cereals
25
0.0033
oil
24
0.0032
strawberries
24
0.0032
tomatosauce
24
0.0032
yams
24
0.0032
brownies
23
0.0031
cookingoil
21
0.0028
antioxydantjuice
18
0.0024
energydrink
18
0.0024
flaxseed
18
0.0024
frenchwine
18
0.0024
cottagecheese
17
0.0023
freshbread
14
0.0019
fromageblanc
14
0.0019
nonfatmilk
14
0.0019
proteinbar
14
0.0019
gums
12
0.0016
pasta
12
0.0016
tomatojuice
12
0.0016
almonds
11
0.0015
cider
11
0.0015
lightmayo
11
0.0015
soda
11
0.0015
mint
10
0.0013
rice
10
0.0013
blacktea
9
0.0012
clothesaccessories
9
0.0012
extradarkchocolate
8
0.0011
greenbeans
8
0.0011
melons
8
0.0011
sandwich
8
0.0011
spinach
8
0.0011
wholeweatflour
8
0.0011
chutney
7
0.0009
greengrapes
7
0.0009
mushroomcreamsauce
7
0.0009
salt
7
0.0009
strongcheese
7
0.0009
whitewine
7
0.0009
bacon
6
0.0008
bugspray
6
0.0008
mintgreentea
6
0.0008
petfood
6
0.0008
babiesfood
5
0.0007
chili
5
0.0007
glutenfreebar
5
0.0007
lightcream
5
0.0007
magazines
5
0.0007
blueberries
4
0.0005
mayonnaise
4
0.0005
shallot
4
0.0005
sparklingwater
4
0.0005
toothpaste
4
0.0005
asparagus
3
0.0004
barbecuesauce
3
0.0004
bramble
3
0.0004
carrots
3
0.0004
chocolatebread
3
0.0004
salad
3
0.0004
burgersauce
2
0.0003
eggplant
2
0.0003
handproteinbar
2
0.0003
mashedpotato
2
0.0003
bodyspray
1
0.0001
cauliflower
1
0.0001
corn
1
0.0001
cream
1
0.0001
ketchup
1
0.0001
oatmeal
1
0.0001
shampoo
1
0.0001
## [1] "item_2"
## item_2
count
pct
mineralwater
484
0.0842
spaghetti
411
0.0715
eggs
302
0.0525
groundbeef
291
0.0506
frenchfries
243
0.0423
frozenvegetables
234
0.0407
chocolate
223
0.0388
milk
209
0.0364
tomatoes
176
0.0306
shrimp
153
0.0266
herb&pepper
115
0.0200
cookies
101
0.0176
cake
99
0.0172
escalope
96
0.0167
wholewheatrice
92
0.0160
pancakes
91
0.0158
greentea
84
0.0146
oliveoil
82
0.0143
burgers
78
0.0136
champagne
78
0.0136
gratedcheese
78
0.0136
soup
76
0.0132
chicken
73
0.0127
redwine
72
0.0125
honey
69
0.0120
wholewheatpasta
68
0.0118
avocado
64
0.0111
frozensmoothie
63
0.0110
pepper
60
0.0104
lowfatyogurt
58
0.0101
ham
57
0.0099
salmon
54
0.0094
butter
53
0.0092
cookingoil
50
0.0087
energydrink
48
0.0084
vegetablesmix
45
0.0078
energybar
44
0.0077
parmesancheese
44
0.0077
pasta
40
0.0070
yogurtcake
38
0.0066
meatballs
37
0.0064
freshtuna
34
0.0059
cottagecheese
32
0.0056
muffins
32
0.0056
blacktea
31
0.0054
frenchwine
30
0.0052
lightmayo
30
0.0052
almonds
29
0.0050
tomatosauce
29
0.0050
hotdogs
28
0.0049
freshbread
25
0.0044
oil
25
0.0044
yams
25
0.0044
brownies
22
0.0038
tomatojuice
22
0.0038
mint
21
0.0037
mushroomcreamsauce
21
0.0037
strawberries
19
0.0033
cereals
18
0.0031
fromageblanc
18
0.0031
cider
17
0.0030
eggplant
17
0.0030
rice
17
0.0030
flaxseed
15
0.0026
melons
15
0.0026
bodyspray
13
0.0023
carrots
13
0.0023
greengrapes
13
0.0023
gums
13
0.0023
whitewine
13
0.0023
nonfatmilk
12
0.0021
candybars
11
0.0019
turkey
11
0.0019
antioxydantjuice
10
0.0017
proteinbar
10
0.0017
shallot
10
0.0017
spinach
10
0.0017
zucchini
10
0.0017
barbecuesauce
9
0.0016
bugspray
9
0.0016
soda
9
0.0016
bacon
8
0.0014
blueberries
8
0.0014
extradarkchocolate
8
0.0014
lightcream
8
0.0014
mintgreentea
8
0.0014
salad
8
0.0014
clothesaccessories
7
0.0012
glutenfreebar
6
0.0010
petfood
6
0.0010
pickles
6
0.0010
salt
6
0.0010
babiesfood
5
0.0009
burgersauce
5
0.0009
chutney
5
0.0009
corn
5
0.0009
greenbeans
5
0.0009
handproteinbar
5
0.0009
ketchup
5
0.0009
mayonnaise
5
0.0009
tea
5
0.0009
wholeweatflour
5
0.0009
oatmeal
4
0.0007
sparklingwater
4
0.0007
strongcheese
4
0.0007
chocolatebread
3
0.0005
dessertwine
3
0.0005
mashedpotato
3
0.0005
sandwich
3
0.0005
toothpaste
3
0.0005
asparagus
2
0.0003
magazines
2
0.0003
shampoo
2
0.0003
cauliflower
1
0.0002
chili
1
0.0002
cream
1
0.0002
waterspray
1
0.0002
## [1] "item_3"
## item_3
count
pct
mineralwater
375
0.0854
spaghetti
279
0.0636
eggs
225
0.0513
milk
213
0.0485
frenchfries
180
0.0410
chocolate
174
0.0396
groundbeef
141
0.0321
greentea
125
0.0285
cake
111
0.0253
pancakes
104
0.0237
oliveoil
88
0.0201
escalope
87
0.0198
frozenvegetables
87
0.0198
lowfatyogurt
87
0.0198
tomatoes
85
0.0194
soup
77
0.0175
chicken
74
0.0169
wholewheatrice
69
0.0157
honey
62
0.0141
salmon
60
0.0137
champagne
58
0.0132
frozensmoothie
58
0.0132
cookingoil
51
0.0116
pepper
51
0.0116
cookies
50
0.0114
shrimp
48
0.0109
avocado
46
0.0105
butter
45
0.0103
vegetablesmix
38
0.0087
pasta
37
0.0084
energybar
36
0.0082
almonds
35
0.0080
brownies
35
0.0080
cottagecheese
34
0.0077
energydrink
34
0.0077
parmesancheese
34
0.0077
freshbread
33
0.0075
wholewheatpasta
33
0.0075
frenchwine
32
0.0073
yogurtcake
32
0.0073
meatballs
31
0.0071
mushroomcreamsauce
31
0.0071
tomatosauce
31
0.0071
hotdogs
30
0.0068
gums
26
0.0059
lightmayo
25
0.0057
yams
24
0.0055
muffins
23
0.0052
oil
23
0.0052
gratedcheese
21
0.0048
strawberries
21
0.0048
tomatojuice
21
0.0048
cereals
20
0.0046
cider
19
0.0043
herb&pepper
19
0.0043
barbecuesauce
18
0.0041
eggplant
18
0.0041
whitewine
18
0.0041
lightcream
17
0.0039
rice
17
0.0039
blacktea
15
0.0034
fromageblanc
15
0.0034
mint
15
0.0034
shallot
15
0.0034
bodyspray
14
0.0032
carrots
14
0.0032
melons
14
0.0032
blueberries
13
0.0030
greengrapes
13
0.0030
antioxydantjuice
12
0.0027
bacon
12
0.0027
flaxseed
12
0.0027
strongcheese
12
0.0027
bugspray
11
0.0025
extradarkchocolate
11
0.0025
proteinbar
11
0.0025
redwine
11
0.0025
ham
10
0.0023
magazines
10
0.0023
spinach
10
0.0023
candybars
9
0.0021
clothesaccessories
9
0.0021
nonfatmilk
9
0.0021
toothpaste
9
0.0021
salt
8
0.0018
soda
8
0.0018
wholeweatflour
8
0.0018
mashedpotato
7
0.0016
chili
6
0.0014
chocolatebread
6
0.0014
greenbeans
6
0.0014
salad
6
0.0014
asparagus
5
0.0011
corn
5
0.0011
glutenfreebar
5
0.0011
oatmeal
5
0.0011
sparklingwater
5
0.0011
babiesfood
4
0.0009
burgersauce
4
0.0009
freshtuna
4
0.0009
mintgreentea
4
0.0009
petfood
4
0.0009
shampoo
4
0.0009
tea
4
0.0009
mayonnaise
3
0.0007
sandwich
3
0.0007
chutney
2
0.0005
dessertwine
2
0.0005
ketchup
2
0.0005
zucchini
2
0.0005
cauliflower
1
0.0002
cream
1
0.0002
handproteinbar
1
0.0002
pickles
1
0.0002
waterspray
1
0.0002
## [1] "item_4"
## item_4
count
pct
mineralwater
201
0.0601
eggs
181
0.0541
frenchfries
174
0.0520
spaghetti
167
0.0499
milk
149
0.0445
chocolate
143
0.0428
greentea
132
0.0395
pancakes
111
0.0332
cake
92
0.0275
oliveoil
84
0.0251
soup
72
0.0215
chicken
70
0.0209
lowfatyogurt
68
0.0203
wholewheatrice
68
0.0203
escalope
63
0.0188
frozensmoothie
62
0.0185
cookingoil
58
0.0173
groundbeef
57
0.0170
cookies
55
0.0164
honey
53
0.0158
salmon
47
0.0141
avocado
43
0.0129
champagne
41
0.0123
freshbread
41
0.0123
tomatoes
37
0.0111
butter
36
0.0108
strawberries
36
0.0108
lightmayo
30
0.0090
almonds
29
0.0087
cottagecheese
29
0.0087
frenchwine
28
0.0084
mushroomcreamsauce
27
0.0081
brownies
26
0.0078
cereals
26
0.0078
muffins
26
0.0078
yogurtcake
26
0.0078
hotdogs
25
0.0075
meatballs
24
0.0072
rice
24
0.0072
vegetablesmix
24
0.0072
energybar
23
0.0069
fromageblanc
21
0.0063
pepper
21
0.0063
proteinbar
21
0.0063
tomatojuice
20
0.0060
energydrink
18
0.0054
frozenvegetables
18
0.0054
lightcream
18
0.0054
oil
17
0.0051
carrots
15
0.0045
eggplant
15
0.0045
extradarkchocolate
14
0.0042
flaxseed
14
0.0042
bugspray
13
0.0039
cider
13
0.0039
greengrapes
13
0.0039
mint
13
0.0039
parmesancheese
13
0.0039
wholeweatflour
13
0.0039
wholewheatpasta
13
0.0039
strongcheese
12
0.0036
whitewine
12
0.0036
bacon
11
0.0033
clothesaccessories
11
0.0033
barbecuesauce
10
0.0030
glutenfreebar
10
0.0030
greenbeans
10
0.0030
magazines
10
0.0030
tomatosauce
10
0.0030
blacktea
9
0.0027
blueberries
9
0.0027
bodyspray
9
0.0027
gums
9
0.0027
nonfatmilk
9
0.0027
petfood
9
0.0027
shrimp
9
0.0027
zucchini
9
0.0027
burgersauce
8
0.0024
corn
8
0.0024
melons
8
0.0024
pasta
8
0.0024
shallot
8
0.0024
sparklingwater
8
0.0024
spinach
8
0.0024
chocolatebread
7
0.0021
yams
7
0.0021
antioxydantjuice
6
0.0018
ketchup
6
0.0018
oatmeal
6
0.0018
chili
5
0.0015
herb&pepper
5
0.0015
soda
5
0.0015
toothpaste
5
0.0015
asparagus
4
0.0012
candybars
4
0.0012
chutney
4
0.0012
handproteinbar
4
0.0012
mashedpotato
4
0.0012
mayonnaise
4
0.0012
mintgreentea
4
0.0012
salt
4
0.0012
sandwich
4
0.0012
shampoo
4
0.0012
babiesfood
3
0.0009
cauliflower
3
0.0009
redwine
3
0.0009
bramble
2
0.0006
cream
1
0.0003
gratedcheese
1
0.0003
ham
1
0.0003
napkins
1
0.0003
salad
1
0.0003
tea
1
0.0003
waterspray
1
0.0003
## [1] "item_5"
## item_5
count
pct
greentea
153
0.0605
eggs
134
0.0530
frenchfries
130
0.0514
chocolate
115
0.0455
milk
114
0.0451
pancakes
90
0.0356
mineralwater
84
0.0332
oliveoil
64
0.0253
cookingoil
63
0.0249
wholewheatrice
63
0.0249
lowfatyogurt
62
0.0245
spaghetti
62
0.0245
cake
60
0.0237
escalope
59
0.0233
chicken
53
0.0210
salmon
53
0.0210
frozensmoothie
49
0.0194
cookies
45
0.0178
soup
45
0.0178
honey
42
0.0166
hotdogs
35
0.0138
freshbread
34
0.0134
tomatojuice
32
0.0127
brownies
28
0.0111
cottagecheese
28
0.0111
cereals
26
0.0103
champagne
26
0.0103
frenchwine
24
0.0095
rice
24
0.0095
lightmayo
23
0.0091
oil
23
0.0091
butter
21
0.0083
carrots
21
0.0083
groundbeef
21
0.0083
lightcream
20
0.0079
energybar
19
0.0075
mushroomcreamsauce
19
0.0075
avocado
18
0.0071
fromageblanc
18
0.0071
meatballs
17
0.0067
vegetablesmix
17
0.0067
yogurtcake
17
0.0067
zucchini
17
0.0067
blacktea
16
0.0063
whitewine
16
0.0063
bodyspray
15
0.0059
muffins
15
0.0059
proteinbar
15
0.0059
strawberries
14
0.0055
magazines
13
0.0051
almonds
12
0.0047
energydrink
12
0.0047
gums
12
0.0047
mint
12
0.0047
nonfatmilk
12
0.0047
pasta
12
0.0047
wholewheatpasta
12
0.0047
barbecuesauce
11
0.0043
greengrapes
10
0.0040
tomatosauce
10
0.0040
blueberries
9
0.0036
eggplant
9
0.0036
extradarkchocolate
9
0.0036
bacon
8
0.0032
candybars
8
0.0032
clothesaccessories
8
0.0032
melons
8
0.0032
salt
8
0.0032
strongcheese
8
0.0032
cider
7
0.0028
greenbeans
7
0.0028
mintgreentea
7
0.0028
shallot
7
0.0028
spinach
7
0.0028
wholeweatflour
7
0.0028
chili
6
0.0024
parmesancheese
6
0.0024
petfood
6
0.0024
sparklingwater
6
0.0024
toothpaste
6
0.0024
bugspray
5
0.0020
flaxseed
5
0.0020
ham
5
0.0020
handproteinbar
5
0.0020
sandwich
5
0.0020
asparagus
4
0.0016
burgersauce
4
0.0016
cauliflower
4
0.0016
chocolatebread
4
0.0016
chutney
4
0.0016
corn
4
0.0016
glutenfreebar
4
0.0016
mayonnaise
4
0.0016
tea
4
0.0016
antioxydantjuice
3
0.0012
ketchup
3
0.0012
pepper
3
0.0012
yams
3
0.0012
babiesfood
2
0.0008
bramble
2
0.0008
frozenvegetables
2
0.0008
mashedpotato
2
0.0008
napkins
2
0.0008
oatmeal
2
0.0008
redwine
2
0.0008
salad
2
0.0008
shampoo
2
0.0008
soda
2
0.0008
cream
1
0.0004
tomatoes
1
0.0004
## [1] "item_6"
## item_6
count
pct
frenchfries
107
0.0574
eggs
102
0.0547
greentea
100
0.0536
chocolate
71
0.0381
pancakes
69
0.0370
oliveoil
63
0.0338
cake
60
0.0322
lowfatyogurt
59
0.0317
frozensmoothie
58
0.0311
chicken
51
0.0274
milk
51
0.0274
cookingoil
48
0.0258
mineralwater
44
0.0236
escalope
42
0.0225
freshbread
39
0.0209
salmon
36
0.0193
honey
35
0.0188
wholewheatrice
34
0.0182
brownies
30
0.0161
cottagecheese
30
0.0161
champagne
28
0.0150
cookies
28
0.0150
hotdogs
26
0.0139
cereals
23
0.0123
spaghetti
23
0.0123
oil
22
0.0118
yogurtcake
22
0.0118
lightmayo
20
0.0107
tomatojuice
19
0.0102
energydrink
18
0.0097
soup
18
0.0097
proteinbar
17
0.0091
whitewine
16
0.0086
extradarkchocolate
15
0.0080
strawberries
15
0.0080
blacktea
14
0.0075
carrots
14
0.0075
rice
14
0.0075
almonds
13
0.0070
eggplant
13
0.0070
frenchwine
13
0.0070
lightcream
13
0.0070
mint
13
0.0070
nonfatmilk
12
0.0064
salt
11
0.0059
barbecuesauce
10
0.0054
butter
10
0.0054
vegetablesmix
10
0.0054
avocado
9
0.0048
bodyspray
9
0.0048
cauliflower
9
0.0048
meatballs
9
0.0048
melons
9
0.0048
blueberries
8
0.0043
burgersauce
8
0.0043
fromageblanc
8
0.0043
magazines
8
0.0043
toothpaste
8
0.0043
bugspray
7
0.0038
groundbeef
7
0.0038
mushroomcreamsauce
7
0.0038
wholeweatflour
7
0.0038
antioxydantjuice
6
0.0032
babiesfood
6
0.0032
bacon
6
0.0032
energybar
6
0.0032
greenbeans
6
0.0032
mayonnaise
6
0.0032
salad
6
0.0032
soda
6
0.0032
strongcheese
6
0.0032
zucchini
6
0.0032
chili
5
0.0027
greengrapes
5
0.0027
handproteinbar
5
0.0027
ketchup
5
0.0027
oatmeal
5
0.0027
glutenfreebar
4
0.0021
mintgreentea
4
0.0021
muffins
4
0.0021
petfood
4
0.0021
shallot
4
0.0021
asparagus
3
0.0016
chocolatebread
3
0.0016
cider
3
0.0016
clothesaccessories
3
0.0016
corn
3
0.0016
ham
3
0.0016
pasta
3
0.0016
tea
3
0.0016
chutney
2
0.0011
cream
2
0.0011
gums
2
0.0011
pepper
2
0.0011
sparklingwater
2
0.0011
tomatoes
2
0.0011
tomatosauce
2
0.0011
bramble
1
0.0005
candybars
1
0.0005
flaxseed
1
0.0005
frozenvegetables
1
0.0005
mashedpotato
1
0.0005
sandwich
1
0.0005
shrimp
1
0.0005
spinach
1
0.0005
yams
1
0.0005
## [1] "item_7"
## item_7
count
pct
greentea
96
0.0701
frenchfries
81
0.0592
pancakes
69
0.0504
eggs
59
0.0431
lowfatyogurt
55
0.0402
frozensmoothie
41
0.0299
freshbread
38
0.0278
cookingoil
37
0.0270
chocolate
36
0.0263
chicken
35
0.0256
escalope
31
0.0226
cake
30
0.0219
brownies
28
0.0205
oliveoil
27
0.0197
wholewheatrice
27
0.0197
champagne
26
0.0190
milk
26
0.0190
lightmayo
24
0.0175
tomatojuice
24
0.0175
honey
23
0.0168
hotdogs
22
0.0161
cottagecheese
19
0.0139
salmon
19
0.0139
whitewine
19
0.0139
cookies
17
0.0124
lightcream
17
0.0124
cereals
16
0.0117
frenchwine
16
0.0117
mineralwater
14
0.0102
oil
14
0.0102
rice
13
0.0095
proteinbar
12
0.0088
energydrink
11
0.0080
soup
11
0.0080
strawberries
11
0.0080
almonds
10
0.0073
bodyspray
10
0.0073
greenbeans
10
0.0073
gums
10
0.0073
carrots
9
0.0066
eggplant
9
0.0066
yogurtcake
9
0.0066
barbecuesauce
8
0.0058
extradarkchocolate
8
0.0058
mint
8
0.0058
mushroomcreamsauce
8
0.0058
shampoo
8
0.0058
spaghetti
8
0.0058
wholeweatflour
8
0.0058
avocado
7
0.0051
blacktea
7
0.0051
blueberries
7
0.0051
melons
7
0.0051
vegetablesmix
7
0.0051
zucchini
7
0.0051
butter
6
0.0044
chili
6
0.0044
clothesaccessories
6
0.0044
handproteinbar
6
0.0044
sparklingwater
6
0.0044
bacon
5
0.0037
bugspray
5
0.0037
cider
5
0.0037
corn
5
0.0037
ham
5
0.0037
meatballs
5
0.0037
nonfatmilk
5
0.0037
salt
5
0.0037
candybars
4
0.0029
cauliflower
4
0.0029
energybar
4
0.0029
magazines
4
0.0029
mashedpotato
4
0.0029
shallot
4
0.0029
strongcheese
4
0.0029
tea
4
0.0029
toothpaste
4
0.0029
burgersauce
3
0.0022
chutney
3
0.0022
fromageblanc
3
0.0022
greengrapes
3
0.0022
ketchup
3
0.0022
petfood
3
0.0022
sandwich
3
0.0022
soda
3
0.0022
antioxydantjuice
2
0.0015
asparagus
2
0.0015
babiesfood
2
0.0015
bramble
2
0.0015
flaxseed
2
0.0015
glutenfreebar
2
0.0015
groundbeef
2
0.0015
mayonnaise
2
0.0015
muffins
2
0.0015
oatmeal
2
0.0015
salad
2
0.0015
spinach
2
0.0015
yams
2
0.0015
chocolatebread
1
0.0007
mintgreentea
1
0.0007
parmesancheese
1
0.0007
pepper
1
0.0007
## [1] "item_8"
## item_8
count
pct
greentea
67
0.0683
pancakes
44
0.0449
lowfatyogurt
43
0.0438
frenchfries
40
0.0408
chocolate
38
0.0387
frozensmoothie
38
0.0387
freshbread
35
0.0357
eggs
28
0.0285
escalope
28
0.0285
cake
27
0.0275
chicken
25
0.0255
honey
24
0.0245
tomatojuice
24
0.0245
cookingoil
23
0.0234
cottagecheese
22
0.0224
brownies
21
0.0214
milk
20
0.0204
cookies
15
0.0153
hotdogs
14
0.0143
lightmayo
13
0.0133
wholewheatrice
13
0.0133
cereals
12
0.0122
champagne
12
0.0122
lightcream
12
0.0122
proteinbar
12
0.0122
yogurtcake
12
0.0122
carrots
11
0.0112
energydrink
10
0.0102
greenbeans
10
0.0102
salt
10
0.0102
magazines
9
0.0092
asparagus
8
0.0082
gums
8
0.0082
mint
8
0.0082
mushroomcreamsauce
8
0.0082
oil
8
0.0082
oliveoil
8
0.0082
salmon
8
0.0082
strawberries
8
0.0082
toothpaste
8
0.0082
whitewine
8
0.0082
almonds
7
0.0071
antioxydantjuice
7
0.0071
blueberries
7
0.0071
melons
7
0.0071
rice
7
0.0071
shampoo
7
0.0071
chili
6
0.0061
glutenfreebar
6
0.0061
vegetablesmix
6
0.0061
avocado
5
0.0051
candybars
5
0.0051
eggplant
5
0.0051
mineralwater
5
0.0051
petfood
5
0.0051
zucchini
5
0.0051
bacon
4
0.0041
barbecuesauce
4
0.0041
mayonnaise
4
0.0041
salad
4
0.0041
spinach
4
0.0041
wholeweatflour
4
0.0041
bodyspray
3
0.0031
bugspray
3
0.0031
burgersauce
3
0.0031
butter
3
0.0031
chutney
3
0.0031
extradarkchocolate
3
0.0031
frenchwine
3
0.0031
handproteinbar
3
0.0031
ketchup
3
0.0031
mintgreentea
3
0.0031
pasta
3
0.0031
sandwich
3
0.0031
shallot
3
0.0031
tea
3
0.0031
blacktea
2
0.0020
bramble
2
0.0020
chocolatebread
2
0.0020
cider
2
0.0020
clothesaccessories
2
0.0020
fromageblanc
2
0.0020
greengrapes
2
0.0020
mashedpotato
2
0.0020
muffins
2
0.0020
oatmeal
2
0.0020
spaghetti
2
0.0020
sparklingwater
2
0.0020
babiesfood
1
0.0010
cauliflower
1
0.0010
corn
1
0.0010
energybar
1
0.0010
flaxseed
1
0.0010
meatballs
1
0.0010
napkins
1
0.0010
soda
1
0.0010
soup
1
0.0010
## [1] "item_9"
## item_9
count
pct
greentea
57
0.0872
lowfatyogurt
38
0.0581
frozensmoothie
35
0.0535
frenchfries
34
0.0520
freshbread
28
0.0428
pancakes
21
0.0321
eggs
19
0.0291
chocolate
18
0.0275
escalope
18
0.0275
tomatojuice
18
0.0275
brownies
15
0.0229
cookingoil
15
0.0229
chicken
13
0.0199
energydrink
13
0.0199
hotdogs
13
0.0199
cake
11
0.0168
mint
11
0.0168
proteinbar
11
0.0168
honey
10
0.0153
lightmayo
10
0.0153
rice
10
0.0153
zucchini
10
0.0153
cottagecheese
9
0.0138
carrots
8
0.0122
cookies
8
0.0122
oliveoil
8
0.0122
wholewheatrice
8
0.0122
almonds
7
0.0107
magazines
7
0.0107
milk
7
0.0107
oil
7
0.0107
cereals
6
0.0092
extradarkchocolate
6
0.0092
lightcream
6
0.0092
mushroomcreamsauce
6
0.0092
salmon
6
0.0092
bacon
5
0.0076
bodyspray
5
0.0076
cauliflower
5
0.0076
petfood
5
0.0076
wholeweatflour
5
0.0076
barbecuesauce
4
0.0061
burgersauce
4
0.0061
champagne
4
0.0061
mayonnaise
4
0.0061
melons
4
0.0061
mintgreentea
4
0.0061
muffins
4
0.0061
whitewine
4
0.0061
chili
3
0.0046
eggplant
3
0.0046
frenchwine
3
0.0046
glutenfreebar
3
0.0046
greenbeans
3
0.0046
mashedpotato
3
0.0046
oatmeal
3
0.0046
sandwich
3
0.0046
shampoo
3
0.0046
strongcheese
3
0.0046
yogurtcake
3
0.0046
asparagus
2
0.0031
babiesfood
2
0.0031
blueberries
2
0.0031
fromageblanc
2
0.0031
gums
2
0.0031
mineralwater
2
0.0031
sparklingwater
2
0.0031
tea
2
0.0031
toothpaste
2
0.0031
antioxydantjuice
1
0.0015
avocado
1
0.0015
blacktea
1
0.0015
bramble
1
0.0015
bugspray
1
0.0015
candybars
1
0.0015
chocolatebread
1
0.0015
chutney
1
0.0015
cider
1
0.0015
clothesaccessories
1
0.0015
corn
1
0.0015
energybar
1
0.0015
handproteinbar
1
0.0015
pasta
1
0.0015
salad
1
0.0015
soda
1
0.0015
soup
1
0.0015
strawberries
1
0.0015
vegetablesmix
1
0.0015
## [1] "item_10"
## item_10
count
pct
greentea
31
0.0785
frenchfries
19
0.0481
lowfatyogurt
17
0.0430
tomatojuice
17
0.0430
pancakes
14
0.0354
brownies
12
0.0304
cereals
12
0.0304
escalope
12
0.0304
frozensmoothie
12
0.0304
chocolate
11
0.0278
cottagecheese
11
0.0278
eggs
10
0.0253
hotdogs
10
0.0253
cake
9
0.0228
wholewheatrice
9
0.0228
cookingoil
8
0.0203
energydrink
7
0.0177
yogurtcake
7
0.0177
champagne
6
0.0152
mint
6
0.0152
bodyspray
5
0.0127
carrots
5
0.0127
cookies
5
0.0127
freshbread
5
0.0127
mayonnaise
5
0.0127
oil
5
0.0127
strawberries
5
0.0127
vegetablesmix
5
0.0127
whitewine
5
0.0127
bugspray
4
0.0101
cauliflower
4
0.0101
chicken
4
0.0101
clothesaccessories
4
0.0101
extradarkchocolate
4
0.0101
glutenfreebar
4
0.0101
handproteinbar
4
0.0101
honey
4
0.0101
shampoo
4
0.0101
gums
3
0.0076
lightmayo
3
0.0076
mushroomcreamsauce
3
0.0076
oatmeal
3
0.0076
proteinbar
3
0.0076
rice
3
0.0076
shallot
3
0.0076
sparklingwater
3
0.0076
toothpaste
3
0.0076
barbecuesauce
2
0.0051
blacktea
2
0.0051
eggplant
2
0.0051
energybar
2
0.0051
frenchwine
2
0.0051
magazines
2
0.0051
melons
2
0.0051
milk
2
0.0051
muffins
2
0.0051
pasta
2
0.0051
salmon
2
0.0051
salt
2
0.0051
wholeweatflour
2
0.0051
zucchini
2
0.0051
asparagus
1
0.0025
babiesfood
1
0.0025
blueberries
1
0.0025
burgersauce
1
0.0025
candybars
1
0.0025
chili
1
0.0025
chocolatebread
1
0.0025
cider
1
0.0025
fromageblanc
1
0.0025
greengrapes
1
0.0025
ketchup
1
0.0025
lightcream
1
0.0025
mashedpotato
1
0.0025
mintgreentea
1
0.0025
napkins
1
0.0025
nonfatmilk
1
0.0025
petfood
1
0.0025
soda
1
0.0025
tea
1
0.0025
## [1] "item_11"
## item_11
count
pct
lowfatyogurt
22
0.0859
greentea
20
0.0781
freshbread
14
0.0547
frenchfries
12
0.0469
lightmayo
9
0.0352
brownies
8
0.0312
frozensmoothie
8
0.0312
cake
7
0.0273
chicken
7
0.0273
cookingoil
7
0.0273
escalope
7
0.0273
pancakes
7
0.0273
chocolate
6
0.0234
eggs
6
0.0234
energydrink
6
0.0234
tomatojuice
6
0.0234
wholewheatrice
6
0.0234
champagne
5
0.0195
mint
5
0.0195
magazines
4
0.0156
proteinbar
4
0.0156
strawberries
4
0.0156
toothpaste
4
0.0156
yogurtcake
4
0.0156
cereals
3
0.0117
cookies
3
0.0117
hotdogs
3
0.0117
salmon
3
0.0117
babiesfood
2
0.0078
carrots
2
0.0078
cauliflower
2
0.0078
chili
2
0.0078
clothesaccessories
2
0.0078
cottagecheese
2
0.0078
eggplant
2
0.0078
extradarkchocolate
2
0.0078
gums
2
0.0078
honey
2
0.0078
ketchup
2
0.0078
melons
2
0.0078
nonfatmilk
2
0.0078
oil
2
0.0078
oliveoil
2
0.0078
salt
2
0.0078
sparklingwater
2
0.0078
whitewine
2
0.0078
asparagus
1
0.0039
barbecuesauce
1
0.0039
blacktea
1
0.0039
bodyspray
1
0.0039
bramble
1
0.0039
burgersauce
1
0.0039
candybars
1
0.0039
chocolatebread
1
0.0039
corn
1
0.0039
handproteinbar
1
0.0039
mayonnaise
1
0.0039
muffins
1
0.0039
mushroomcreamsauce
1
0.0039
rice
1
0.0039
salad
1
0.0039
sandwich
1
0.0039
spinach
1
0.0039
strongcheese
1
0.0039
tea
1
0.0039
wholeweatflour
1
0.0039
## [1] "item_12"
## item_12
count
pct
greentea
15
0.0974
frenchfries
10
0.0649
frozensmoothie
10
0.0649
lowfatyogurt
9
0.0584
freshbread
7
0.0455
pancakes
7
0.0455
tomatojuice
7
0.0455
honey
4
0.0260
mint
4
0.0260
cake
3
0.0195
cereals
3
0.0195
chocolate
3
0.0195
cookies
3
0.0195
cottagecheese
3
0.0195
energydrink
3
0.0195
escalope
3
0.0195
hotdogs
3
0.0195
lightmayo
3
0.0195
magazines
3
0.0195
mayonnaise
3
0.0195
melons
3
0.0195
mushroomcreamsauce
3
0.0195
proteinbar
3
0.0195
whitewine
3
0.0195
brownies
2
0.0130
candybars
2
0.0130
champagne
2
0.0130
eggs
2
0.0130
extradarkchocolate
2
0.0130
ketchup
2
0.0130
salt
2
0.0130
shampoo
2
0.0130
wholeweatflour
2
0.0130
wholewheatrice
2
0.0130
blueberries
1
0.0065
bodyspray
1
0.0065
chicken
1
0.0065
cookingoil
1
0.0065
corn
1
0.0065
eggplant
1
0.0065
glutenfreebar
1
0.0065
greengrapes
1
0.0065
ham
1
0.0065
mashedpotato
1
0.0065
rice
1
0.0065
strawberries
1
0.0065
strongcheese
1
0.0065
toothpaste
1
0.0065
vegetablesmix
1
0.0065
zucchini
1
0.0065
## [1] "item_13"
## item_13
count
pct
greentea
8
0.0920
freshbread
6
0.0690
lowfatyogurt
6
0.0690
escalope
4
0.0460
frenchfries
4
0.0460
tomatojuice
4
0.0460
brownies
3
0.0345
hotdogs
3
0.0345
melons
3
0.0345
salt
3
0.0345
energydrink
2
0.0230
frozensmoothie
2
0.0230
gums
2
0.0230
lightmayo
2
0.0230
mint
2
0.0230
nonfatmilk
2
0.0230
pancakes
2
0.0230
proteinbar
2
0.0230
sparklingwater
2
0.0230
yogurtcake
2
0.0230
asparagus
1
0.0115
barbecuesauce
1
0.0115
bugspray
1
0.0115
burgersauce
1
0.0115
cauliflower
1
0.0115
cereals
1
0.0115
champagne
1
0.0115
cookies
1
0.0115
cookingoil
1
0.0115
corn
1
0.0115
cottagecheese
1
0.0115
eggplant
1
0.0115
ham
1
0.0115
honey
1
0.0115
magazines
1
0.0115
mashedpotato
1
0.0115
oil
1
0.0115
salad
1
0.0115
tea
1
0.0115
toothpaste
1
0.0115
whitewine
1
0.0115
wholewheatrice
1
0.0115
zucchini
1
0.0115
## [1] "item_14"
## item_14
count
pct
greentea
4
0.0851
frenchfries
3
0.0638
frozensmoothie
3
0.0638
cottagecheese
2
0.0426
eggplant
2
0.0426
escalope
2
0.0426
freshbread
2
0.0426
glutenfreebar
2
0.0426
hotdogs
2
0.0426
lowfatyogurt
2
0.0426
mint
2
0.0426
oil
2
0.0426
pancakes
2
0.0426
salad
2
0.0426
toothpaste
2
0.0426
babiesfood
1
0.0213
cookies
1
0.0213
eggs
1
0.0213
handproteinbar
1
0.0213
lightmayo
1
0.0213
mineralwater
1
0.0213
muffins
1
0.0213
mushroomcreamsauce
1
0.0213
proteinbar
1
0.0213
salt
1
0.0213
strawberries
1
0.0213
tomatojuice
1
0.0213
yogurtcake
1
0.0213
## [1] "item_15"
## item_15
count
pct
magazines
3
0.12
freshbread
2
0.08
greentea
2
0.08
lowfatyogurt
2
0.08
pancakes
2
0.08
candybars
1
0.04
cereals
1
0.04
clothesaccessories
1
0.04
cookies
1
0.04
handproteinbar
1
0.04
hotdogs
1
0.04
mineralwater
1
0.04
mint
1
0.04
mushroomcreamsauce
1
0.04
salmon
1
0.04
tomatojuice
1
0.04
toothpaste
1
0.04
yogurtcake
1
0.04
zucchini
1
0.04
## [1] "item_16"
## item_16
count
pct
antioxydantjuice
1
0.125
cake
1
0.125
chocolate
1
0.125
frozensmoothie
1
0.125
magazines
1
0.125
proteinbar
1
0.125
salmon
1
0.125
sparklingwater
1
0.125
## [1] "item_17"
## item_17
count
pct
frozensmoothie
2
0.50
antioxydantjuice
1
0.25
frenchfries
1
0.25
## [1] "item_18"
## item_18
count
pct
proteinbar
2
0.50
frozensmoothie
1
0.25
spinach
1
0.25
## [1] "item_19"
## item_19
count
pct
cereals
1
0.3333
mayonnaise
1
0.3333
spinach
1
0.3333
## [1] "item_20"
## item_20
count
pct
oliveoil
1
1
## [1] "Done Processing"
## [1] "20 variables processed"
## [1] "Function Efficiency Statistics"
## user system elapsed
## 11.90 0.55 12.82
# Load the libraries
library(arules)
library(arulesViz)
#Set NA values on dataframe excluding the transaction_id
mb_data_no_id[mb_data_no_id==""]<-NA
#Rename dataframe
mb_data_ar <- mb_data_no_id
#Run Apriori to generate rules
rules <- apriori(mb_data_ar, parameter = list(supp = 0.001, conf = 0.8))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 7
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[1268 item(s), 7501 transaction(s)] done [0.01s].
## sorting and recoding items ... [608 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [35 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
## For better comparison we sort the rules by confidence and add Bayardo's improvement.
## https://stackoverflow.com/questions/38796256/association-rule-in-r-removing-redundant-rule-arules/38809330
## https://cran.r-project.org/web/packages/arules/arules.pdf
## Bayardo's rule: The improvement of a rule is the minimum difference between its confidence and the confidence of any more general rule (i.e., a rule with the same consequent but one or more items removed in the LHS).
rules <- sort(rules, by="confidence", decreasing=TRUE)
quality(rules)$improvement <- interestMeasure(rules, measure = "improvement")
options(digits=2)
#Remove duplicate rules
#https://stackoverflow.com/questions/47928125/removing-inverted-reverse-duplicate-rules-from-apriori-result-in-r
generating_itemsets <- generatingItemsets(rules)
duplicated_generating_itemsets <- which(duplicated(generating_itemsets))
rules <- rules[-duplicated_generating_itemsets]
#Put non-redundant rules into dataframe and display
df_inspect_rules = data.frame(
antecedent_lhs = labels(lhs(rules)),
consequent_rhs = labels(rhs(rules)),
rules@quality)
#Add rule label
df_inspect_rules$rule <- seq.int(nrow(df_inspect_rules))
#Move rule label to front of table
df_inspect_rules <- df_inspect_rules %>%
select(rule, everything())
#Round rules table to 3 digits
df_inspect_rules <- df_inspect_rules %>%
mutate_if(is.numeric, round, digits=4)
#Display table
datatable(df_inspect_rules, extensions = 'FixedColumns',
rownames=FALSE,
class = 'cell-border stripe compact hover',
caption = "Association Rules Results",
options = list(pageLength = 50,
autoWidth = TRUE,
dom = 'Bfrtip',
scrollX = TRUE,
fixedColumns=list(leftColumns = 3)))
#Apriori Summary
summary(rules)
## set of 31 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4
## 2 27 2
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 3 3 3 3 4
##
## summary of quality measures:
## support confidence coverage lift count
## Min. :0.0011 Min. :0.80 Min. :0.0011 Min. : 14 Min. : 8
## 1st Qu.:0.0011 1st Qu.:0.83 1st Qu.:0.0012 1st Qu.: 22 1st Qu.: 8
## Median :0.0012 Median :0.91 Median :0.0015 Median : 40 Median : 9
## Mean :0.0018 Mean :0.90 Mean :0.0020 Mean : 56 Mean :13
## 3rd Qu.:0.0016 3rd Qu.:0.97 3rd Qu.:0.0017 3rd Qu.: 78 3rd Qu.:12
## Max. :0.0099 Max. :1.00 Max. :0.0104 Max. :375 Max. :74
## improvement
## Min. :-0.0202
## 1st Qu.: Inf
## Median : Inf
## Mean : Inf
## 3rd Qu.: Inf
## Max. : Inf
##
## mining info:
## data ntransactions support confidence
## mb_data_ar 7501 0.001 0.8
#https://www.rdocumentation.org/packages/arulesViz/versions/1.3-3/topics/plotly_arules
# interactive scatter plot visualizations
#Confidence vs. Support by Lift
plotly_arules(rules,
jitter = 20,
marker = list(opacity = .7, size = 8, symbol = 4),
colors = c("blue", "red"))
#Lift vs. Support by Lift
plotly_arules(rules,
measure = c("support", "lift"),
jitter = 20,
marker = list(opacity = .7, size = 8, symbol = 4),
colors = c("blue", "red"))
#Lift vs. Confidence by Lift
plotly_arules(rules,
measure = c("confidence", "lift"),
jitter = 20,
marker = list(opacity = .7, size = 8, symbol = 4),
colors = c("blue", "red"))
# save plot as a html page, embed in iframe
p <- plot(rules,method="graph",engine="htmlwidget", shading="lift")
htmlwidgets::saveWidget(p, "arules.html", selfcontained = TRUE)
#browseURL("arules.html")
htmltools::tags$iframe(width=900,
height=600,
title = "Association Rules",
src = "arules.html",
scrolling="no",
style="border:1px solid black;")
# https://cran.r-project.org/web/packages/arulesViz/arulesViz.pdf
# Represents the rules (or itemsets) as a graph with items as labeled vertices, and rules (or
# itemsets) represented as vertices connected to items using arrows. For rules, the LHS items
# are connected with arrows pointing to the vertex representing the rule and the RHS has an
# arrow pointing to the item.
Understand a grocery store’s customer purchasing behavior though the Apriori algorithm to create Association Rules, also known as Market Basket analysis, and provide actionable recommendations.
The Market Basket analysis results revealed 31 rules with high confidence (> .8). Of those rules, eight are selected on a lift basis for store management to consider for placement strategy. The eight rules are formed into two groups for this analysis: “Budget Dinners” and “Fancy Dinners”.
There are many potential uses for these results. The rules identify which items should be placed near each other on a shelf to tempt likely buyers into purchasing additional items. Discounting tactics could be used on items identified in the rules. Buyers who purchase one of the rule items could be targeted with advertisements for the other items contained in the rule.
The two highest-impact Association Rules groups selected clustered around an approximate lift value of 100. This can be interpreted as a customer is 100 times more likely to buy the consequent (then statement) when the antecedent (if statement) is present.
The Budget Dinners show shoppers are more likely to buy spaghetti at varying lift degrees if mineral water, milk, frozen vegetables, ground beef and burgers are present in the transaction. The Fancy Dinners show transactions grouping on three consequent items: escalope, frozen vegetables and mineral water. The antecedent items associated with the Fancy Dinners group are pasta, mushroom cream sauce, french fries, shrimp, tomatoes, spaghetti and chocolate.
The confidence in the Fancy Dinners group, for example, shows a transaction containing pasta and mushroom cream sauce is 100% more likely to contain escalope. This conclusion may change in a dataset with higher support for this specific rule.
The rule with the highest support, ‘If burgers, then turkey’, indicates the store sells burgers and turkey together at a high frequency (rule appears in .0099% of all transactions, 79 instances).
The set parameters for the Apriori algorithm returned results with support greater than .001 and confidence greater than .8.
The evaluation measure for recommendation is lift. Confidence is used as a secondary measure followed by support. This results in the identification of three separate rule groups. The first rule, ‘If olive oil then milk’, has the highest lift (375) but is discarded from recommendation due to a relative lack of support. It is recommended to revisit this conclusion in a future analysis run against more transactions. The other two rule groups are identified near lift values of 100. The primary separator between the Budget Dinners and the Fancy Dinners groups are confidence. The Fancy Dinners display higher confidence (>.9) compared to the Budget Dinners (<.9).
This interactive chart represents the association rules as a graph with items as labeled vertices, and rules represented as vertices connected to items using arrows. For rules, the antecedent items are connected with arrows pointing to the vertex representing the rule and the consequent has an arrow pointing to the item.
Sales Rank | Item | Count | Pct of Items Sold |
---|---|---|---|
1 | mineralwater | 1788 | .0609 |
2 | eggs | 1348 | .0459 |
3 | spaghetti | 1306 | .0445 |
4 | frenchfries | 1282 | .0437 |
5 | chocolate | 1230 | .0419 |
6 | greentea | 991 | .0337 |
7 | milk | 972 | .0331 |
8 | groundbeef | 737 | .0251 |
9 | frozenvegetables | 715 | .0244 |
10 | pancakes | 713 | .0243 |
… | … | … | … |
All Items | All Items | 29,363 | 1 |
#Time Script
end.time <- Sys.time()
elapsed.time <- round((end.time - start.time), 3)
paste0("Elapsed Time is : ", elapsed.time )
## [1] "Elapsed Time is : 24.649"