Association Rules

The association rules extraction algorithm is included in the arules library. The supermarkt transaction data to be used for executing market basket analysis can be found in the Grocery Shopping datasets page of ACM RecSys. More specifically we will use the Belgium retail market dataset.

library(arules)
fileURL <- "http://fimi.ua.ac.be/data/retail.dat.gz"
download.file(fileURL, destfile="retail.data.gz", method="curl")
# Read the data in basket format
trans = read.transactions("retail.data.gz", format = "basket", sep=" ");
inspect(trans[1:10])
##      items
## [1]  {0,  
##       1,  
##       10, 
##       11, 
##       12, 
##       13, 
##       14, 
##       15, 
##       16, 
##       17, 
##       18, 
##       19, 
##       2,  
##       20, 
##       21, 
##       22, 
##       23, 
##       24, 
##       25, 
##       26, 
##       27, 
##       28, 
##       29, 
##       3,  
##       4,  
##       5,  
##       6,  
##       7,  
##       8,  
##       9}  
## [2]  {30, 
##       31, 
##       32} 
## [3]  {33, 
##       34, 
##       35} 
## [4]  {36, 
##       37, 
##       38, 
##       39, 
##       40, 
##       41, 
##       42, 
##       43, 
##       44, 
##       45, 
##       46} 
## [5]  {38, 
##       39, 
##       47, 
##       48} 
## [6]  {38, 
##       39, 
##       48, 
##       49, 
##       50, 
##       51, 
##       52, 
##       53, 
##       54, 
##       55, 
##       56, 
##       57, 
##       58} 
## [7]  {32, 
##       41, 
##       59, 
##       60, 
##       61, 
##       62} 
## [8]  {3,  
##       39, 
##       48} 
## [9]  {63, 
##       64, 
##       65, 
##       66, 
##       67, 
##       68} 
## [10] {32, 
##       69}
summary(trans)
## transactions as itemMatrix in sparse format with
##  88162 rows (elements/itemsets/transactions) and
##  16470 columns (items) and a density of 0.0006257289 
## 
## most frequent items:
##      39      48      38      32      41 (Other) 
##   50675   42135   15596   15167   14945  770058 
## 
## element (itemset/transaction) length distribution:
## sizes
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 3016 5516 6919 7210 6814 6163 5746 5143 4660 4086 3751 3285 2866 2620 2310 
##   16   17   18   19   20   21   22   23   24   25   26   27   28   29   30 
## 2115 1874 1645 1469 1290 1205  981  887  819  684  586  582  472  480  355 
##   31   32   33   34   35   36   37   38   39   40   41   42   43   44   45 
##  310  303  272  234  194  136  153  123  115  112   76   66   71   60   50 
##   46   47   48   49   50   51   52   53   54   55   56   57   58   59   60 
##   44   37   37   33   22   24   21   21   10   11   10    9   11    4    9 
##   61   62   63   64   65   66   67   68   71   73   74   76 
##    7    4    5    2    2    5    3    3    1    1    1    1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    4.00    8.00   10.31   14.00   76.00 
## 
## includes extended item information - examples:
##   labels
## 1      0
## 2      1
## 3     10

After reading succesfully the transaction we go ahead with our analysis:

rules <- apriori(trans, parameter = list(support = 0.01, confidence = 0.6))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.6    0.1    1 none FALSE            TRUE       5    0.01      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 881 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[16470 item(s), 88162 transaction(s)] done [0.22s].
## sorting and recoding items ... [70 item(s)] done [0.01s].
## creating transaction tree ... done [0.06s].
## checking subsets of size 1 2 3 4 done [0.01s].
## writing ... [84 rule(s)] done [0.00s].
## creating S4 object  ... done [0.03s].
quality(rules) <- round(quality(rules), digits=3)
rules
## set of 84 rules

84 rules were generated. To see them use inspect.

inspect(rules)
##      lhs            rhs  support confidence lift  count
## [1]  {37}        => {38} 0.012   0.974      5.505  1046
## [2]  {286}       => {38} 0.013   0.943      5.333  1116
## [3]  {12925}     => {39} 0.011   0.639      1.112   938
## [4]  {1146}      => {39} 0.011   0.689      1.199   983
## [5]  {79}        => {39} 0.013   0.694      1.208  1111
## [6]  {1327}      => {39} 0.013   0.647      1.126  1156
## [7]  {438}       => {39} 0.014   0.676      1.177  1260
## [8]  {60}        => {39} 0.011   0.660      1.149   983
## [9]  {255}       => {48} 0.012   0.717      1.500  1057
## [10] {255}       => {39} 0.012   0.717      1.248  1057
## [11] {533}       => {39} 0.010   0.620      1.079   922
## [12] {270}       => {39} 0.014   0.689      1.198  1194
## [13] {2238}      => {39} 0.015   0.750      1.306  1287
## [14] {110}       => {38} 0.031   0.975      5.513  2725
## [15] {110}       => {39} 0.020   0.630      1.095  1759
## [16] {147}       => {39} 0.013   0.639      1.112  1137
## [17] {271}       => {39} 0.016   0.685      1.191  1434
## [18] {413}       => {48} 0.013   0.604      1.263  1135
## [19] {413}       => {39} 0.013   0.601      1.046  1130
## [20] {36}        => {38} 0.032   0.950      5.372  2790
## [21] {36}        => {39} 0.023   0.694      1.207  2037
## [22] {475}       => {48} 0.016   0.659      1.379  1428
## [23] {475}       => {39} 0.017   0.692      1.204  1500
## [24] {170}       => {38} 0.034   0.978      5.529  3031
## [25] {170}       => {39} 0.023   0.664      1.156  2059
## [26] {101}       => {39} 0.016   0.626      1.089  1400
## [27] {310}       => {48} 0.019   0.652      1.365  1692
## [28] {310}       => {39} 0.021   0.714      1.242  1852
## [29] {237}       => {39} 0.022   0.636      1.107  1929
## [30] {225}       => {39} 0.027   0.722      1.256  2351
## [31] {89}        => {48} 0.032   0.729      1.526  2798
## [32] {89}        => {39} 0.031   0.716      1.246  2749
## [33] {65}        => {39} 0.032   0.623      1.084  2787
## [34] {38}        => {39} 0.117   0.663      1.154 10345
## [35] {41}        => {48} 0.102   0.603      1.263  9018
## [36] {41}        => {39} 0.129   0.764      1.329 11414
## [37] {48}        => {39} 0.331   0.692      1.203 29142
## [38] {110,48}    => {38} 0.015   0.986      5.575  1361
## [39] {110,38}    => {39} 0.020   0.639      1.111  1740
## [40] {110,39}    => {38} 0.020   0.989      5.592  1740
## [41] {110,48}    => {39} 0.012   0.751      1.307  1037
## [42] {36,48}     => {38} 0.015   0.960      5.429  1360
## [43] {36,38}     => {39} 0.022   0.697      1.213  1945
## [44] {36,39}     => {38} 0.022   0.955      5.398  1945
## [45] {36,48}     => {39} 0.013   0.788      1.371  1116
## [46] {475,48}    => {39} 0.012   0.765      1.330  1092
## [47] {39,475}    => {48} 0.012   0.728      1.523  1092
## [48] {170,48}    => {38} 0.017   0.988      5.584  1538
## [49] {170,38}    => {39} 0.023   0.666      1.159  2019
## [50] {170,39}    => {38} 0.023   0.981      5.543  2019
## [51] {170,48}    => {39} 0.014   0.775      1.348  1206
## [52] {101,48}    => {39} 0.011   0.722      1.255   946
## [53] {101,39}    => {48} 0.011   0.676      1.414   946
## [54] {310,48}    => {39} 0.015   0.796      1.385  1347
## [55] {310,39}    => {48} 0.015   0.727      1.522  1347
## [56] {237,48}    => {39} 0.014   0.740      1.287  1244
## [57] {237,39}    => {48} 0.014   0.645      1.349  1244
## [58] {225,48}    => {39} 0.016   0.806      1.403  1400
## [59] {48,89}     => {39} 0.024   0.759      1.321  2125
## [60] {39,89}     => {48} 0.024   0.773      1.617  2125
## [61] {48,65}     => {39} 0.020   0.711      1.236  1797
## [62] {39,65}     => {48} 0.020   0.645      1.349  1797
## [63] {32,38}     => {39} 0.021   0.649      1.130  1840
## [64] {38,41}     => {48} 0.027   0.609      1.275  2374
## [65] {38,41}     => {39} 0.035   0.783      1.362  3051
## [66] {38,48}     => {39} 0.069   0.768      1.336  6102
## [67] {32,41}     => {48} 0.023   0.645      1.351  2063
## [68] {32,41}     => {39} 0.027   0.738      1.284  2359
## [69] {32,48}     => {39} 0.061   0.672      1.170  5402
## [70] {32,39}     => {48} 0.061   0.639      1.337  5402
## [71] {41,48}     => {39} 0.084   0.817      1.421  7366
## [72] {39,41}     => {48} 0.084   0.645      1.350  7366
## [73] {110,38,48} => {39} 0.012   0.758      1.318  1031
## [74] {110,39,48} => {38} 0.012   0.994      5.620  1031
## [75] {36,38,48}  => {39} 0.012   0.794      1.382  1080
## [76] {36,39,48}  => {38} 0.012   0.968      5.471  1080
## [77] {170,38,48} => {39} 0.014   0.776      1.349  1193
## [78] {170,39,48} => {38} 0.014   0.989      5.592  1193
## [79] {32,38,48}  => {39} 0.014   0.751      1.306  1236
## [80] {32,38,39}  => {48} 0.014   0.672      1.406  1236
## [81] {38,41,48}  => {39} 0.023   0.839      1.459  1991
## [82] {38,39,41}  => {48} 0.023   0.653      1.365  1991
## [83] {32,41,48}  => {39} 0.019   0.798      1.388  1646
## [84] {32,39,41}  => {48} 0.019   0.698      1.460  1646

For a lot more information check the Introduction to arules vignette.