请写出基于deepfm使用Movielens中ml-1m数据集进行电影推荐,根据用户输入的电影title在输出五个推荐结果,结果显示电影ID,title,genres
时间: 2024-05-05 21:22:16 浏览: 184
以下是基于DeepFM模型使用Movielens中ml-1m数据集进行电影推荐的示例代码:
```python
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from deepctr.models import DeepFM
from deepctr.inputs import SparseFeat, DenseFeat, get_feature_names
# 加载数据集
data = pd.read_csv('ml-1m/ratings.dat', header=None, sep='::', engine='python', names=['user_id', 'movie_id', 'rating', 'timestamp'])
movies = pd.read_csv('ml-1m/movies.dat', header=None, sep='::', engine='python', names=['movie_id', 'title', 'genres'])
# 将电影信息与评分信息合并
data = pd.merge(data, movies, on='movie_id')
# 对特征进行编码
sparse_features = ['user_id', 'movie_id', 'title', 'genres']
for feat in sparse_features:
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
# 对特征进行归一化处理
dense_features = ['timestamp']
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])
# 定义DeepFM模型的输入特征
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
for feat in sparse_features] + [DenseFeat(feat, 1, ) for feat in dense_features]
dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)
# 构造训练集和测试集
train_data = data.sample(frac=0.8, random_state=2021)
test_data = data.drop(train_data.index)
train_model_input = {name: train_data[name] for name in feature_names}
test_model_input = {name: test_data[name] for name in feature_names}
# 训练DeepFM模型
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile("adam", "mse", metrics=['mse'], )
model.fit(train_model_input, train_data['rating'].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
# 预测用户对电影的评分,并根据评分排序输出推荐结果
def recommend_movies(title):
# 获取电影的ID
movie_id = data[data['title'] == title]['movie_id'].values[0]
# 构造用户输入
user_id = np.array([0])
movie_id = np.array([movie_id])
title = np.array([data[data['movie_id'] == movie_id]['title'].values[0]])
genres = np.array([data[data['movie_id'] == movie_id]['genres'].values[0]])
timestamp = np.array([0])
# 对特征进行编码和归一化处理
user_id = lbe.transform(user_id)
movie_id = lbe.transform(movie_id)
title = lbe.transform(title)
genres = lbe.transform(genres)
timestamp = mms.transform(timestamp.reshape(-1, 1)).reshape(-1)
# 预测用户对电影的评分
pred = model.predict([user_id, movie_id, title, genres, timestamp])
# 根据评分排序输出推荐结果
topk = 5
item_score_list = []
for i in range(data['movie_id'].nunique()):
item_id = np.array([i])
title = np.array([data[data['movie_id'] == item_id]['title'].values[0]])
genres = np.array([data[data['movie_id'] == item_id]['genres'].values[0]])
item_id = lbe.transform(item_id)
title = lbe.transform(title)
genres = lbe.transform(genres)
timestamp = np.array([0])
timestamp = mms.transform(timestamp.reshape(-1, 1)).reshape(-1)
score = model.predict([user_id, item_id, title, genres, timestamp])
item_score_list.append((i, data[data['movie_id'] == item_id]['title'].values[0], data[data['movie_id'] == item_id]['genres'].values[0], score))
item_score_list = sorted(item_score_list, key=lambda x: x[3], reverse=True)
for i in range(topk):
print('Movie ID:', item_score_list[i][0])
print('Title:', item_score_list[i][1])
print('Genres:', item_score_list[i][2])
```
用户可以调用 recommend_movies 函数,输入电影的 title,即可获得五个推荐结果。
阅读全文