本文整理汇总了Python中recsys.datamodel.data.Data类的典型用法代码示例。如果您正苦于以下问题:Python Data类的具体用法?Python Data怎么用?Python Data使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Data类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _convert_hash
def _convert_hash(self, dataset):
data = Data()
for key in dataset:
record = dataset[key]
batch = [(record[k], key, k) for k in record]
data.set(batch, extend=True)
return data
开发者ID:norbert,项目名称:fickle,代码行数:7,代码来源:recommenders.py
示例2: train_and_save
def train_and_save(filename):
step = filename.split('.')[-1]
data = Data()
format = {'col': 1, 'row': 0, 'value': 2, 'ids': 'str'}
data.load(filename, sep='::', format=format)
train, test = data.split_train_test(percent=80)
try:
svd = SVD('svdn_model_{step}.zip'.format(step=step))
print('Already exists: svdn_model_{step}.zip'.format(step=step))
except:
svd = SVD()
svd.set_data(train)
svd.compute(
k=100,
min_values=2,
pre_normalize=False,
mean_center=True,
post_normalize=True,
savefile='svdn_model_{step}'.format(step=step)
)
print('Saved svdn_model_{step}.zip'.format(step=step))
开发者ID:Chuchu2OP,项目名称:amazon-recsys,代码行数:31,代码来源:train.py
示例3: ex1
def ex1(dat_file='./ml-1m/ratings.dat',
pct_train=0.5):
data = Data()
data.load(dat_file, sep='::', format={'col':0, 'row':1, 'value':2,'ids':int})
# create train/test split
train, test = data.split_train_test(percent=pct_train)
# create svd
K=100
svd = SVD()
svd.set_data(train)
svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)
# evaluate performance
rmse = RMSE()
mae = MAE()
for rating, item_id, user_id in test.get():
try:
pred_rating = svd.predict(item_id, user_id)
rmse.add(rating, pred_rating)
mae.add(rating, pred_rating)
except KeyError:
continue
print 'RMSE=%s' % rmse.compute()
print 'MAE=%s' % mae.compute()
开发者ID:barnettjacob,项目名称:ga_ds,代码行数:29,代码来源:Lec14_RecommendationSystem.py
示例4: Algorithm
class Algorithm(object):
def __init__(self):
self._data = Data()
def __repr__(self):
s = '%d rows.' % len(self.get_data())
if len(self.get_data()):
s += '\nE.g: %s' % str(self.get_data()[0])
return s
def __len__(self):
return len(self.get_data())
def get_data(self):
return self._data
def set_data(self, data):
self._data = data
def add_tuple(self, tuple):
self.get_data().add_tuple(tuple)
def load_data(self, filename, sep='\t', format={'value':0, 'row':1, 'col':2}):
self._data.load_file(filename, sep, format)
def compute(self):
if not self._data.get():
raise ValueError('No data set. Matrix is empty!')
开发者ID:luosha865,项目名称:recsysPlatform,代码行数:28,代码来源:baseclass.py
示例5: get_preference
def get_preference(user_List):
#generate list of users
preference_dict={}
user_map={}
data = Data() #saving rating data
i=1
for user in user_List:
user_id=(str(user))
url = "http://api.steampowered.com/IPlayerService/GetOwnedGames/v0001/?\
key=147CBF377C6B648EC3DC73499CE73D32&steamid="+user+"&format=json"
response = urllib2.urlopen(url)
owned_gameData = json.loads(response.read().decode('utf-8-sig'))
user_Pref={}
#print (user)
try:
if owned_gameData['response']['game_count']!=0:
user_Pref={}
for games in owned_gameData['response']['games']:
if games['playtime_forever']>0:
user_Pref[games['appid']]= math.log(games['playtime_forever'])
data.add_tuple((math.log(games['playtime_forever'], 10), games['appid'], i))
user_map[i]=user
except:
continue
i=i+1
preference_dict[user]=user_Pref
data.save('rating.dat')
开发者ID:Sapphirine,项目名称:Game-recommendation-on-Steam,代码行数:28,代码来源:game.py
示例6: test_data_extend
def test_data_extend():
dataset = [(1,2,3), (4,5,6)]
dataset2 = [(7,8,9), (10,11,12)]
data = Data()
data.set(dataset)
assert_equal(len(data), 2)
data.set(dataset2, extend=True)
assert_equal(len(data), 4)
开发者ID:1060460048,项目名称:python-recsys,代码行数:9,代码来源:test_datamodel.py
示例7: load_ratings
def load_ratings(filename):
""" Load ratings
"""
data = Data()
format = {'col':0, 'row':1, 'value':2, 'ids': 'int'}
data.load(filename, sep=',', format=format)
return data
开发者ID:srikanth3569,项目名称:movie,代码行数:9,代码来源:utils.py
示例8: build_model
def build_model(self,uids,kn):
data = Data()
for uid,songs in uids.items():
for song in songs:
data.add_tuple((1,song,uid))
svd = SVD()
svd.set_data(data)
svd.compute(k=kn,min_values=1)
self.model = svd
开发者ID:micolin,项目名称:thesis,代码行数:9,代码来源:svd.py
示例9: getAverageRating
def getAverageRating(ITEMID):
averageRating = 0
totalUsers = 0
data = Data()
data.load('./data/movielens/ratings.dat', sep='::', format={'col':0, 'row':1, 'value':2, 'ids':int})
for rating, item_id, user_id in data.get():
if(item_id == ITEMID):
totalUsers += 1
averageRating += rating
print averageRating/totalUsers
开发者ID:udaysagar2177,项目名称:predictMovieRatings,代码行数:10,代码来源:main.py
示例10: calculate_stats_features
def calculate_stats_features(pct_train):
dat_file='feature_matrix.csv'
data = Data()
data.load(dat_file, sep=',', format={'col':0, 'row':1, 'value':2,'ids':int})
train, test = data.split_train_test(percent=pct_train)
K=100
svd = SVD()
svd.set_data(train)
svd.compute(k=K, min_values=0, pre_normalize=None, mean_center=False,
post_normalize=False)
return svd,train,test
开发者ID:setman85,项目名称:Rest_Recs,代码行数:11,代码来源:rec2.py
示例11: get_data_model_matrix
def get_data_model_matrix(data):
"""
This method process raw data and store rating/users/movies in a matrix <value/row/column> respectively
using recsys library
:return: data object (recsys.datamodel.Data()) )
"""
processed_data = Data()
for user, review in data.items():
for mov, rat in review.items():
processed_data.add_tuple((rat, user, mov))
return processed_data
开发者ID:LaPetiteSouris,项目名称:Collective_Intelligence,代码行数:11,代码来源:modellingdata.py
示例12: get_friend_matrix
def get_friend_matrix(u_ids, raw_data):
idata = Data()
u_idx = 0
for u_id in u_ids:
u_idx += 1
i_idx = 0
i_ids = raw_data[u_id].keys()
for i_id in i_ids:
i_idx += 1
rate, ts = raw_data[u_id][i_id]
idata.add_tuple((float(rate),u_idx,i_idx))
return idata
开发者ID:wanghs09,项目名称:Experiments,代码行数:13,代码来源:tmp_svd.py
示例13: prepare_data
def prepare_data(raw_data):
idata = Data()
u_idx = 0
for u_id in raw_data.keys():
i_idx = 0
u_idx += 1
pre_u_raw_data = raw_data[u_id]
for i_id in pre_u_raw_data.keys():
i_idx += 1
rate, _ = pre_u_raw_data[i_id]
idata.add_tuple((float(rate),u_idx,i_idx))
return idata
开发者ID:wanghs09,项目名称:Experiments,代码行数:13,代码来源:svd_tmp.py
示例14: setup_svd
def setup_svd(self, vote_list):
if self.svd is None:
self.cache['svd'] = SVD()
data = Data()
for vote in vote_list:
user_id = vote[0].id
item_id = vote[1]
value = float(vote[2])
data.add_tuple((value, item_id, user_id)) # Tuple format is: <value, row, column>
self.cache['svd'].set_data(data)
self.cache['svd'].compute(k=self.k, min_values=1)
return self.svd
开发者ID:AlinaKay,项目名称:django-recommends,代码行数:13,代码来源:pyrecsys.py
示例15: setUp
def setUp(self):
data = Data()
for stars, item_id, user_id in ratings:
data.add_tuple((stars, item_id, user_id))
movies = dict()
for mid, name, genres in movie_genres:
movie = Item(mid)
movie.add_data({'name': name, 'genres': genres})
movies[mid] = movie
self.ratings = data
self.movies = movies
开发者ID:zermelozf,项目名称:clrec,代码行数:14,代码来源:test.py
示例16: test_save_n_load
def test_save_n_load(percent_train,
modelKlass = SVD,
dataFname ='/Users/jennyyuejin/recommender/Data/movieData/u.data',
dataFormat = {'col':0, 'row':1, 'value':2, 'ids':int}):
data = Data()
data.load(dataFname, sep='\t', format=dataFormat)
print '------ evaluating original'
train, test = data.split_train_test(percent=percent_train, shuffle_data=False)
print len(train), 'training data points;', len(test), 'testing data points'
#Create SVD
K=100
svd = modelKlass()
svd.set_data(train)
svd.compute(k=K, min_values=5, pre_normalize=None, mean_center=True, post_normalize=True)
evaluate(svd, test)
svd.save_model('./model/svd.obj.zip',
{'k': K, 'min_values': 5,
'pre_normalize': None, 'mean_center': True, 'post_normalize': True})
print '------ evaluating copy'
data2 = Data()
data2.load(dataFname, sep='\t', format=dataFormat)
_, test2 = data2.split_train_test(percent=percent_train, shuffle_data=False) # reload data
print len(test2), 'testing data points'
svd_pred = modelKlass()
svd_pred.load_model('./model/svd.obj.zip')
evaluate(svd_pred, test2)
开发者ID:jennyyuejin,项目名称:recommender,代码行数:34,代码来源:save_load_test.py
示例17: read_user_data_from_ratings
def read_user_data_from_ratings(data_file):
data = Data()
format = {'col':0, 'row':1, 'value':2, 'ids': 'int'}
data.load(dat_file, sep='::', format=format)
userdict = {}
for d in data.get():
if d[2] in userdict:
user = userdict[d[2]]
else:
user = User(d[2])
user.add_item(d[1],d[0])
userdict[d[2]] = user
return userdict
开发者ID:cristianvirtual,项目名称:aiw-second-edition,代码行数:15,代码来源:ch3.py
示例18: update
def update(self, USER_ID, baseline, path, pred_items):
print "Loading tweet occurrences pickle..."
baseline.get_data()._load_pickle(path=path + "tweet_occurrences.p")
tweet_occurrences = baseline.get_data().get()
print "Loading count_dict pickle..."
count_dict = cPickle.load(open(path + "count_dict.p"))
print "Loading occurrences pickle..."
occurrences = cPickle.load(open(path + "occurrences.p"))
total_count = count_dict[USER_ID]
upd_total_count = int(total_count) + len(pred_items)
count_dict[USER_ID] = int(upd_total_count)
print "Dumping count_dict pickle..."
cPickle.dump(count_dict, open(path + "count_dict.p", "wb"), 2)
print "Updating counts for known artists..."
for index, (count, item_id, user_id) in enumerate(tweet_occurrences):
if str(user_id).encode('utf-8') == USER_ID:
item_id = str(item_id).encode('utf-8')
count = occurrences[(item_id, USER_ID)]
upd_count = float(count) / float(upd_total_count)
occurrences[(item_id, USER_ID)] = float(upd_count)
baseline._matrix.set_value(item_id, USER_ID, float(upd_count))
tweet_occurrences[index] = (float(upd_count), item_id, user_id)
print "Updating counts for recommended artists..."
for item_id, relevance in pred_items:
count = (1.0 / float(upd_total_count))
baseline._matrix.set_value(item_id, USER_ID, float(count))
occurrences[(item_id, USER_ID)] = float(count)
tweet_occurrences.append((float(count), item_id, USER_ID))
print "Dumping tweet occurrences pickle..."
data_tweet_occurrences = Data()
data_tweet_occurrences.set(tweet_occurrences)
baseline.set_data(data_tweet_occurrences)
baseline.save_data(filename=path + "tweet_occurrences.p", pickle=True)
print "Dumping occurrence pickle..."
cPickle.dump(occurrences, open(path + "occurrences.p", "wb"), protocol=2)
print "Dumping sparse matrix pickle..."
cPickle.dump(baseline._matrix.get(), open(path + "sparse_matrix.p", "w"), protocol=2)
开发者ID:dnarwani,项目名称:twitter-rec,代码行数:48,代码来源:functions.py
示例19: test_utf8_data
def test_utf8_data():
data_in = Data()
NUM_PLAYS = 69
ITEMID = u'Bj\xf6rk'
data_in.add_tuple([NUM_PLAYS, ITEMID, USERID1])
NUM_PLAYS = 34
ITEMID = 'Björk'
data_in.add_tuple([NUM_PLAYS, ITEMID, USERID2])
data_in.save(os.path.join(MOVIELENS_DATA_PATH, 'ratings.matrix.saved.utf8'))
data_saved = Data()
data_saved.load(os.path.join(MOVIELENS_DATA_PATH, 'ratings.matrix.saved.utf8'))
assert_equal(len(data_in), len(data_saved))
开发者ID:1060460048,项目名称:python-recsys,代码行数:17,代码来源:test_algorithm.py
示例20: build_svd_item_based
def build_svd_item_based(user_op_item_cnt, item_op_users, user_idx, item_idx, min_nonzero):
svd = SVD()
data = Data()
item_lst = []
for ui in user_op_item_cnt:
if len(user_op_item_cnt[ui]) < min_nonzero:
continue
for ti in user_op_item_cnt[ui]:
if item_op_users[ti] < min_nonzero:
continue
if 1.0*user_op_item_cnt[ui][ti] < 1:
continue
item_lst.append(ti)
data.add_tuple(((1.0*user_op_item_cnt[ui][ti]), item_idx[ti], user_idx[ui]))
item_lst = list(set(item_lst))
svd.set_data(data)
return svd, item_lst
开发者ID:billionprince,项目名称:round3,代码行数:17,代码来源:SVD_multi_model_based_recommendation.py
注:本文中的recsys.datamodel.data.Data类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论