본문 바로가기
대학교/3.데이터마이닝

맥주 추천 시스템

by Jcoder 2019. 1. 3.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# Ninth code snippet
library('reshape')
library('foreign')
library('ggplot2')
 
# Eighth code snippet
installations <- read.csv(file.path('data''installations.csv'))
head(installations)
 
user.package.matrix <- cast(installations, User ~ Beer, value = 'Like')
 
user.package.matrix[, 1]
user.package.matrix[, 2]
 
row.names(user.package.matrix) <- user.package.matrix[, 1]
 
user.package.matrix <- user.package.matrix[, -1]
 
# Tenth code snippet
similarities <- cor(user.package.matrix)
 
nrow(similarities)
ncol(similarities)
similarities[11]
similarities[12]
 
# Eleventh code snippet
distances <- -log((similarities / 2+ 0.5)
 
# Twelfth code snippet
k.nearest.neighbors <- function(i, distances, k = 5)
{
  return(order(distances[i, ])[2:(k + 1)])
}
 
# Thirteenth code snippet
installation.probability <- function(user, package, user.package.matrix, distances, k = 5)
{
  neighbors <- k.nearest.neighbors(package, distances, k = 5)
  return(mean(sapply(neighbors, function (neighbor) {user.package.matrix[user, neighbor]})))
}
 
installation.probability(11, user.package.matrix, distances)
 
# Fourteenth code snippet
most.probable.packages <- function(user, user.package.matrix, distances, k = 5)
{
  return(order(sapply(1:ncol(user.package.matrix),
               function (package)
               {
                 installation.probability(user,
                                          package,
                                          user.package.matrix,
                                          distances,
                                          k = k)
               }),
         decreasing = TRUE))
}
 
user <- 1
listing <- most.probable.packages(user, user.package.matrix, distances)
colnames(user.package.matrix)[listing[1:10]]
 
 
ex.dist <- dist(distances)
ex.dist
 
ex.mds <- cmdscale(ex.dist)
plot(ex.mds, type = 'n')
text(ex.mds, c('1-sunmin''2-hoyoung''3-kiwon''4-jaewon''5-ilsub''6-minhoo''7-minsuk''8-taewon''9-minji''10-sungmin'))
 
 
cs

installations.csv