Recommenders in R, Comparing Multiple Algorithms

We know several essential recommenders’ methods. If we want to recommend ourselves a book, we can do it
1. Based on our own exp
2. Based on our friends friends exp
3. Based on the catalog of the library
4. Based on the search engine’s result
We already talked a little about the first method here:https://charleshsliao.wordpress.com/2017/03/06/an-quick-association-rules-example-within-r/

These are actually the basic methods used for items recommendation to customers. We will take advantage of the methods in R and compare them in detailed algorithms.

data(MovieLense)
image(MovieLense)
hist(getRatings(normalize(MovieLense)))

Screen Shot 2017-05-25 at 5.01.56 PM.png

Screen Shot 2017-05-25 at 5.07.22 PM.png

###2. load lib of recommenderlib and check the methods for realRatingMatrix data

library(recommenderlab)
recommenderRegistry$get_entry_names()
#[1] "ALS_realRatingMatrix"            "ALS_implicit_realRatingMatrix"
#[3] "ALS_implicit_binaryRatingMatrix" "AR_binaryRatingMatrix"
#[5] "IBCF_binaryRatingMatrix"         "IBCF_realRatingMatrix"
#[7] "POPULAR_binaryRatingMatrix"      "POPULAR_realRatingMatrix"
#[9] "RANDOM_realRatingMatrix"         "RANDOM_binaryRatingMatrix"
#[11] "RERECOMMEND_realRatingMatrix"    "SVD_realRatingMatrix"
#[13] "SVDF_realRatingMatrix"           "UBCF_binaryRatingMatrix"
#[15] "UBCF_realRatingMatrix"  

###3. Train the recommenders

###IBCF ## Description: Recommender based on item-based collaborative filtering
###We can also use getModel() to reveal the attributes of the model
ratings<-sample(MovieLense,900,replace=F)
ratings_ibcf<-Recommender(ratings,"IBCF")
ratings_popu<-Recommender(ratings,"POPULAR")
names(getModel(ratings_ibcf))
#[1] "description"          "sim"                  "k"
#[4] "method"               "normalize"            "normalize_sim_matrix"
#[7] "alpha"                "na_as_zero"           "verbose"  

###4. recommender::predict 

###4.1 produce the movies/items
pred_popu<-predict(ratings_popu,ratings[896:900,],n=5)
as(pred_popu,"list") ###produce the recommended movies of 5
#$`76`
#[1] "Star Wars (1977)"               "Godfather, The (1972)"
#[3] "Raiders of the Lost Ark (1981)" "Titanic (1997)"
#[5] "Return of the Jedi (1983)"     #

#$`859`
#[1] "Star Wars (1977)"                 "Godfather, The (1972)"
#[3] "Fargo (1996)"                     "Raiders of the Lost Ark (1981)"
#[5] "Silence of the Lambs, The (1991)"#

#$`364`
#[1] "Star Wars (1977)"                 "Godfather, The (1972)"
#[3] "Fargo (1996)"                     "Raiders of the Lost Ark (1981)"
#[5] "Silence of the Lambs, The (1991)"#

#$`663`
#[1] "Empire Strikes Back, The (1980)" "L.A. Confidential (1997)"
#[3] "Casablanca (1942)"               "Braveheart (1995)"
#[5] "Amadeus (1984)"                 #

#$`283`
#[1] "Godfather, The (1972)"            "Raiders of the Lost Ark (1981)"
#[3] "Silence of the Lambs, The (1991)" "Schindler's List (1993)"
#[5] "Titanic (1997)"

pred_ibcf<-predict(ratings_ibcf,ratings[896:900,],n=5)
as(pred_ibcf,"list") ###produce the recommended movies of 5
#
#$`76`
#[1] "Mr. Holland's Opus (1995)"
#[2] "To Wong Foo, Thanks for Everything! Julie Newmar (1995)"
#[3] "Eat Drink Man Woman (1994)"
#[4] "Lion King, The (1994)"
#[5] "Mask, The (1994)"                                       #

#$`859`
#[1] "Richard III (1995)"          "Hoop Dreams (1994)"          "Heavy Metal (1981)"
#[4] "Moll Flanders (1996)"        "Fish Called Wanda, A (1988)"#

#$`364`
#[1] "Wings of the Dove, The (1997)"
#[2] "Mulholland Falls (1996)"
#[3] "Michael (1996)"
#[4] "I Shot Andy Warhol (1996)"
#[5] "Rendezvous in Paris (Rendez-vous de Paris, Les) (1995)"#

#$`663`
#[1] "From Dusk Till Dawn (1996)"
#[2] "Brothers McMullen, The (1995)"
#[3] "To Wong Foo, Thanks for Everything! Julie Newmar (1995)"
#[4] "Madness of King George, The (1994)"
#[5] "Natural Born Killers (1994)"                            #

#$`283`
#[1] "Babe (1995)"               "Mr. Holland's Opus (1995)" "Taxi Driver (1976)"
#[4] "Batman Forever (1995)"     "Strange Days (1995)"      

pred_ibcf_first_3<-bestN(pred_ibcf,n=3)
as(pred_ibcf_first_3,"list")
#$`76`
#[1] "Mr. Holland's Opus (1995)"
#[2] "To Wong Foo, Thanks for Everything! Julie Newmar (1995)"
#[3] "Eat Drink Man Woman (1994)"                             #

#$`859`
#[1] "Richard III (1995)" "Hoop Dreams (1994)" "Heavy Metal (1981)"#

#$`364`
#[1] "Wings of the Dove, The (1997)" "Mulholland Falls (1996)"
#[3] "Michael (1996)"               #

#$`663`
#[1] "From Dusk Till Dawn (1996)"
#[2] "Brothers McMullen, The (1995)"
#[3] "To Wong Foo, Thanks for Everything! Julie Newmar (1995)"#

#$`283`
#[1] "Babe (1995)"               "Mr. Holland's Opus (1995)" "Taxi Driver (1976)"  

###4.2 predict the ratings
rate_ibcf<-predict(ratings_ibcf, ratings[896:900,],type="ratings")
as(rate_ibcf,"matrix")[,1:3]
rate_popu<-predict(ratings_popu, ratings[896:900,],type="ratings")
as(rate_popu,"matrix")[,1:3]
#    Toy Story (1995) GoldenEye (1995) Four Rooms (1995)
#76          3.867774         3.311593          3.132167
#859         4.152952         3.596771                NA
#364         3.780483         3.224302          3.044876
#663               NA         3.285649                NA
#283         4.438874         3.882693          3.703267

################################################################
####     Above is the basic usage of the recommenderlab     ####
################################################################

###5. We rebuild the recommenders with training and test data, and evaluate the models

###5.1 build models to predict ratings
eva<-evaluationScheme(ratings[1:900,],method="split",train=0.7,given=15,goodRating=5)
r_ubcf<-Recommender(getData (eva,"train"),"UBCF")
p_ubcf<-predict(r_ubcf,getData(eva,"know"),type="ratings")
r_ibcf<-Recommender(getData (eva,"train"),"IBCF")
p_ibcf<-predict(r_ibcf,getData(eva,"know"),type="ratings")

###5.2 calculate the accuracy of prediction of ratings
acc_ubcf<-calcPredictionAccuracy(p_ubcf,getData(eva,"unknow"))
acc_ibcf<-calcPredictionAccuracy(p_ibcf,getData(eva,"unknow"))
error_ratings<-rbind(acc_ubcf,acc_ibcf)
error_ratings
#          RMSE      MSE       MAE
#acc_ubcf 1.048826 1.100037 0.8371466
#acc_ibcf 1.272593 1.619494 0.9309019

###5.3 build recommenders to recommend movies/items
##Besides the split method to process the data, we can also use CV and get the average
tops<-evaluationScheme(ratings[1:900,],method="cross",k=10,given=3,goodRating=5)
##We evaluate top 1,3,5,10 accuracy
results<-evaluate(tops,method="POPULAR",type="topNList",n=c(1,3,5,10))
getConfusionMatrix(results)[[1]]
#          TP        FP       FN       TN precision     recall        TPR          FPR
#1  0.2777778 0.7222222 20.28889 1639.711 0.2777778 0.02046982 0.02046982 0.0004396207
#3  0.7666667 2.2333333 19.80000 1638.200 0.2555556 0.05339671 0.05339671 0.0013594868
#5  1.2111111 3.7888889 19.35556 1636.644 0.2422222 0.07050944 0.07050944 0.0023049373
#10 2.1222222 7.8777778 18.44444 1632.556 0.2122222 0.13780612 0.13780612 0.0047949666

avg(results)
#          TP        FP       FN       TN precision     recall        TPR          FPR
#1  0.3422222 0.6577778 21.36111 1638.639 0.3422222 0.02364371 0.02364371 0.0003997357
#3  0.7755556 2.2244444 20.92778 1637.072 0.2585185 0.05169088 0.05169088 0.0013530345
#5  1.1466667 3.8533333 20.55667 1635.443 0.2293333 0.06967972 0.06967972 0.0023438332
#10 2.0688889 7.9311111 19.63444 1631.366 0.2068889 0.12795976 0.12795976 0.0048257459

plot(results,"prec/rec",annotate=T)

Screen Shot 2017-05-25 at 5.08.49 PM.png


################################################################
####       6.Compare the algorithms of recommenders         ####
################################################################

###6.1 Compare algs for top n recommendation
set.seed(2017)
scheme <- evaluationScheme(ratings, method = "split" , train = 0.8
                           , k = 1 , given = 10 , goodRating = 5 )
algorithms<-list ("random items" = list ( name = "RANDOM" ,  param = NULL ),
                  "popular items" = list ( name = "POPULAR",param = list ( normalize = "Z-score" ) ),
                  "user-based CF" = list ( name = "UBCF", param = list ( normalize = "Z-score",method = "Cosine",nn = 25) ),
                  "item-based CF" = list ( name = "IBCF" ,  param = list ( k = 50 ) ),
                  "SVD approx" = list ( name = "SVD" ,  param = NULL ) )
results_algs<-evaluate(scheme,algorithms,n=c(1,3,5,10,15,20))
plot(results_algs,annotate=c(1,3),legend="topleft")
plot (results_algs , "prec/rec" , annotate = c ( 2 , 3 , 4 ) , legend = "bottomright" )

###6.2 Compare algs for ratings
results_ratings <- evaluate(scheme, algorithms, type = "ratings" )
plot(results_ratings,ylim=c(0,3))

Screen Shot 2017-05-25 at 5.09.31 PM.png
Screen Shot 2017-05-25 at 5.09.39 PM.png

Screen Shot 2017-05-25 at 5.10.30 PM.png

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s