The association rules extraction algorithm is included in the arules
library. The supermarkt transaction data to be used for executing market basket analysis can be found in the Grocery Shopping datasets page of ACM RecSys. More specifically we will use the Belgium retail market dataset.
library(arules)
fileURL <- "http://fimi.ua.ac.be/data/retail.dat.gz"
download.file(fileURL, destfile="retail.data.gz", method="curl")
# Read the data in basket format
trans = read.transactions("retail.data.gz", format = "basket", sep=" ");
inspect(trans[1:10])
## items
## [1] {0,
## 1,
## 10,
## 11,
## 12,
## 13,
## 14,
## 15,
## 16,
## 17,
## 18,
## 19,
## 2,
## 20,
## 21,
## 22,
## 23,
## 24,
## 25,
## 26,
## 27,
## 28,
## 29,
## 3,
## 4,
## 5,
## 6,
## 7,
## 8,
## 9}
## [2] {30,
## 31,
## 32}
## [3] {33,
## 34,
## 35}
## [4] {36,
## 37,
## 38,
## 39,
## 40,
## 41,
## 42,
## 43,
## 44,
## 45,
## 46}
## [5] {38,
## 39,
## 47,
## 48}
## [6] {38,
## 39,
## 48,
## 49,
## 50,
## 51,
## 52,
## 53,
## 54,
## 55,
## 56,
## 57,
## 58}
## [7] {32,
## 41,
## 59,
## 60,
## 61,
## 62}
## [8] {3,
## 39,
## 48}
## [9] {63,
## 64,
## 65,
## 66,
## 67,
## 68}
## [10] {32,
## 69}
summary(trans)
## transactions as itemMatrix in sparse format with
## 88162 rows (elements/itemsets/transactions) and
## 16470 columns (items) and a density of 0.0006257289
##
## most frequent items:
## 39 48 38 32 41 (Other)
## 50675 42135 15596 15167 14945 770058
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 3016 5516 6919 7210 6814 6163 5746 5143 4660 4086 3751 3285 2866 2620 2310
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
## 2115 1874 1645 1469 1290 1205 981 887 819 684 586 582 472 480 355
## 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
## 310 303 272 234 194 136 153 123 115 112 76 66 71 60 50
## 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 44 37 37 33 22 24 21 21 10 11 10 9 11 4 9
## 61 62 63 64 65 66 67 68 71 73 74 76
## 7 4 5 2 2 5 3 3 1 1 1 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 4.00 8.00 10.31 14.00 76.00
##
## includes extended item information - examples:
## labels
## 1 0
## 2 1
## 3 10
After reading succesfully the transaction we go ahead with our analysis:
rules <- apriori(trans, parameter = list(support = 0.01, confidence = 0.6))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.6 0.1 1 none FALSE TRUE 5 0.01 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 881
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[16470 item(s), 88162 transaction(s)] done [0.22s].
## sorting and recoding items ... [70 item(s)] done [0.01s].
## creating transaction tree ... done [0.06s].
## checking subsets of size 1 2 3 4 done [0.01s].
## writing ... [84 rule(s)] done [0.00s].
## creating S4 object ... done [0.03s].
quality(rules) <- round(quality(rules), digits=3)
rules
## set of 84 rules
84 rules were generated. To see them use inspect
.
inspect(rules)
## lhs rhs support confidence lift count
## [1] {37} => {38} 0.012 0.974 5.505 1046
## [2] {286} => {38} 0.013 0.943 5.333 1116
## [3] {12925} => {39} 0.011 0.639 1.112 938
## [4] {1146} => {39} 0.011 0.689 1.199 983
## [5] {79} => {39} 0.013 0.694 1.208 1111
## [6] {1327} => {39} 0.013 0.647 1.126 1156
## [7] {438} => {39} 0.014 0.676 1.177 1260
## [8] {60} => {39} 0.011 0.660 1.149 983
## [9] {255} => {48} 0.012 0.717 1.500 1057
## [10] {255} => {39} 0.012 0.717 1.248 1057
## [11] {533} => {39} 0.010 0.620 1.079 922
## [12] {270} => {39} 0.014 0.689 1.198 1194
## [13] {2238} => {39} 0.015 0.750 1.306 1287
## [14] {110} => {38} 0.031 0.975 5.513 2725
## [15] {110} => {39} 0.020 0.630 1.095 1759
## [16] {147} => {39} 0.013 0.639 1.112 1137
## [17] {271} => {39} 0.016 0.685 1.191 1434
## [18] {413} => {48} 0.013 0.604 1.263 1135
## [19] {413} => {39} 0.013 0.601 1.046 1130
## [20] {36} => {38} 0.032 0.950 5.372 2790
## [21] {36} => {39} 0.023 0.694 1.207 2037
## [22] {475} => {48} 0.016 0.659 1.379 1428
## [23] {475} => {39} 0.017 0.692 1.204 1500
## [24] {170} => {38} 0.034 0.978 5.529 3031
## [25] {170} => {39} 0.023 0.664 1.156 2059
## [26] {101} => {39} 0.016 0.626 1.089 1400
## [27] {310} => {48} 0.019 0.652 1.365 1692
## [28] {310} => {39} 0.021 0.714 1.242 1852
## [29] {237} => {39} 0.022 0.636 1.107 1929
## [30] {225} => {39} 0.027 0.722 1.256 2351
## [31] {89} => {48} 0.032 0.729 1.526 2798
## [32] {89} => {39} 0.031 0.716 1.246 2749
## [33] {65} => {39} 0.032 0.623 1.084 2787
## [34] {38} => {39} 0.117 0.663 1.154 10345
## [35] {41} => {48} 0.102 0.603 1.263 9018
## [36] {41} => {39} 0.129 0.764 1.329 11414
## [37] {48} => {39} 0.331 0.692 1.203 29142
## [38] {110,48} => {38} 0.015 0.986 5.575 1361
## [39] {110,38} => {39} 0.020 0.639 1.111 1740
## [40] {110,39} => {38} 0.020 0.989 5.592 1740
## [41] {110,48} => {39} 0.012 0.751 1.307 1037
## [42] {36,48} => {38} 0.015 0.960 5.429 1360
## [43] {36,38} => {39} 0.022 0.697 1.213 1945
## [44] {36,39} => {38} 0.022 0.955 5.398 1945
## [45] {36,48} => {39} 0.013 0.788 1.371 1116
## [46] {475,48} => {39} 0.012 0.765 1.330 1092
## [47] {39,475} => {48} 0.012 0.728 1.523 1092
## [48] {170,48} => {38} 0.017 0.988 5.584 1538
## [49] {170,38} => {39} 0.023 0.666 1.159 2019
## [50] {170,39} => {38} 0.023 0.981 5.543 2019
## [51] {170,48} => {39} 0.014 0.775 1.348 1206
## [52] {101,48} => {39} 0.011 0.722 1.255 946
## [53] {101,39} => {48} 0.011 0.676 1.414 946
## [54] {310,48} => {39} 0.015 0.796 1.385 1347
## [55] {310,39} => {48} 0.015 0.727 1.522 1347
## [56] {237,48} => {39} 0.014 0.740 1.287 1244
## [57] {237,39} => {48} 0.014 0.645 1.349 1244
## [58] {225,48} => {39} 0.016 0.806 1.403 1400
## [59] {48,89} => {39} 0.024 0.759 1.321 2125
## [60] {39,89} => {48} 0.024 0.773 1.617 2125
## [61] {48,65} => {39} 0.020 0.711 1.236 1797
## [62] {39,65} => {48} 0.020 0.645 1.349 1797
## [63] {32,38} => {39} 0.021 0.649 1.130 1840
## [64] {38,41} => {48} 0.027 0.609 1.275 2374
## [65] {38,41} => {39} 0.035 0.783 1.362 3051
## [66] {38,48} => {39} 0.069 0.768 1.336 6102
## [67] {32,41} => {48} 0.023 0.645 1.351 2063
## [68] {32,41} => {39} 0.027 0.738 1.284 2359
## [69] {32,48} => {39} 0.061 0.672 1.170 5402
## [70] {32,39} => {48} 0.061 0.639 1.337 5402
## [71] {41,48} => {39} 0.084 0.817 1.421 7366
## [72] {39,41} => {48} 0.084 0.645 1.350 7366
## [73] {110,38,48} => {39} 0.012 0.758 1.318 1031
## [74] {110,39,48} => {38} 0.012 0.994 5.620 1031
## [75] {36,38,48} => {39} 0.012 0.794 1.382 1080
## [76] {36,39,48} => {38} 0.012 0.968 5.471 1080
## [77] {170,38,48} => {39} 0.014 0.776 1.349 1193
## [78] {170,39,48} => {38} 0.014 0.989 5.592 1193
## [79] {32,38,48} => {39} 0.014 0.751 1.306 1236
## [80] {32,38,39} => {48} 0.014 0.672 1.406 1236
## [81] {38,41,48} => {39} 0.023 0.839 1.459 1991
## [82] {38,39,41} => {48} 0.023 0.653 1.365 1991
## [83] {32,41,48} => {39} 0.019 0.798 1.388 1646
## [84] {32,39,41} => {48} 0.019 0.698 1.460 1646
For a lot more information check the Introduction to arules vignette.