代码拉取完成,页面将自动刷新
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
# Load data
data = pd.read_csv('ml-100k/u.data', sep='\t', header=None)
data.columns = ['user_id', 'item_id', 'rating', 'timestamp']
# Split data into train and test sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
# Create user-item matrix
num_users = data['user_id'].nunique()
num_items = data['item_id'].nunique()
user_item_matrix = np.zeros((num_users, num_items))
for row in train_data.itertuples():
user_item_matrix[row[1]-1, row[2]-1] = row[3]
# Calculate item-item similarity matrix
item_sim_matrix = cosine_similarity(user_item_matrix.T)
# Predict ratings for test set
test_data['predicted_rating'] = 0
for row in test_data.itertuples():
user_id = row[1]-1
item_id = row[2]-1
user_items = user_item_matrix[user_id]
item_similarities = item_sim_matrix[item_id]
relevant_items = np.where(user_items > 0)[0]
if len(relevant_items) > 0:
predicted_rating = np.dot(user_items[relevant_items], item_similarities[relevant_items]) / np.sum(item_similarities[relevant_items])
else:
predicted_rating = np.mean(user_item_matrix[user_id])
test_data.at[row[0], 'predicted_rating'] = predicted_rating
# Evaluate performance
mse = np.mean((test_data['rating'] - test_data['predicted_rating'])**2)
print('Mean squared error:', mse)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。