Skip to content

Commit 880b2f8

Browse files
committed
First Version
0 parents  commit 880b2f8

File tree

4 files changed

+102
-0
lines changed

4 files changed

+102
-0
lines changed

jester-data-1.xls

15.3 MB
Binary file not shown.

jester-data-1500.xls

467 KB
Binary file not shown.

main.py

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
__author__ = 'Febrian Imanda Effendy'
2+
3+
import xlrd
4+
import numpy as np
5+
import math
6+
7+
# sh = data.sheet_by_index(0)
8+
# print sh.name, sh.nrows, sh.ncols
9+
# for rx in range(sh.nrows):
10+
# print sh.row(rx)
11+
def getData(filename):
12+
data = xlrd.open_workbook(filename)
13+
sheet = data.sheet_by_index(0)
14+
return sheet
15+
16+
DATA = getData("jester-data-1500.xls")
17+
SHEET_ROWS = DATA.nrows
18+
SHEET_COLUMN = DATA.ncols
19+
20+
# Fungsi untuk mendapatkan rating dari 1 item berdasarkan user
21+
def getRating(user, item):
22+
return DATA.row(user)[item].value
23+
24+
# Fungsi untuk mendapatkan rating dari seluruh item berdasarkan user
25+
def getItemRating(user):
26+
rating = []
27+
for item in range(1, SHEET_COLUMN):
28+
rating += [getRating(user, item)]
29+
# return rating
30+
listRating = np.array(rating)
31+
return listRating
32+
33+
# Fungsi untuk menghitung rata-rata dari list rating yang diberikan (numpy format)
34+
def getAverageRating(rates):
35+
total = []
36+
for i in rates:
37+
temp = 0 if i >= 99 else i
38+
total.append(temp)
39+
listTotal = np.array(total)
40+
return np.mean(listTotal)
41+
42+
# Fungsi untuk mendapatkan semua neighbour dari user
43+
def getNeighbours(user):
44+
neighbour = []
45+
for i in range(SHEET_ROWS):
46+
if i != user :
47+
for j in range(SHEET_COLUMN):
48+
yUser = getRating(user, j)
49+
yNeighbour = getRating(i, j)
50+
if (yUser < 99) and (yNeighbour < 99) :
51+
neighbour += [i]
52+
break
53+
# return neighbour
54+
listNeighbours = np.array(neighbour)
55+
return listNeighbours
56+
57+
# Fungsi untuk mendapatkan similiaritas dari 2 user yang dibandingkan
58+
def getSimiliarity(user1, user2):
59+
yAvgUser1 = getAverageRating(getItemRating(user1))
60+
yAvgUser2 = getAverageRating(getItemRating(user2))
61+
atas = 0
62+
bawah = 0
63+
for i in range(SHEET_COLUMN) :
64+
yUser1 = getRating(user1, i)
65+
yUser2 = getRating(user2, i)
66+
atas += (yUser1 - yAvgUser1) * (yUser2 - yAvgUser2)
67+
yUser1a = 0
68+
yUser2a = 0
69+
for i in range(SHEET_COLUMN) :
70+
yUser1a += (yUser1 - yAvgUser1) ** 2
71+
yUser2a += (yUser2 - yAvgUser2) ** 2
72+
bawah = math.sqrt(yUser1a * yUser2a)
73+
sim = atas / bawah
74+
return sim
75+
76+
# Fungsi untuk mendapatkan 20 similiaritas terbesar menggunakan metode mergesort dengan O(nlog(n))
77+
def getTopSimiliarity(listSim):
78+
listSim = np.sort(listSim, kind='mergesort')
79+
listSim = listSim[::-1]
80+
return listSim[1:21:1]
81+
82+
# Fungsi untuk mendapatkan prediksi rating
83+
def getPredictedRating(user, item):
84+
yAvgUser = getAverageRating(getItemRating(user))
85+
neighbours = getNeighbours(user)
86+
atas = 0
87+
bawah = 0
88+
# for i in range(SHEET_COLUMN):
89+
for j in range(len(neighbours)):
90+
similiarities = getSimiliarity(neighbours[j], user)
91+
tempRating = getRating(neighbours[j], item)
92+
rating = 0 if tempRating >= 99 else tempRating
93+
yAvgNeighbour = getAverageRating(getItemRating(neighbours[j]))
94+
print "User",user, " | User",neighbours[j], " - Similiarities :", similiarities, " - rating :", rating, " - avg :", yAvgNeighbour
95+
atas += similiarities * (rating - yAvgNeighbour)
96+
bawah += abs(similiarities)
97+
predicted = yAvgUser + (atas / bawah)
98+
return predicted
99+
100+
# print getAverageRating(getItemRating(0))
101+
print getPredictedRating(0, 100)

out.txt

+1
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)