java实现协同过滤算法
时间: 2023-11-18 07:55:43 浏览: 85
基于Hadoop的Java实现用户协同过滤算法源码
协同过滤算法是一种推荐算法,可以结合用户的行为历史和其他用户的行为相似性来为用户生成个性化推荐。下面是一个简单的Java实现协同过滤算法的示例:
首先,我们需要定义一个User类来表示用户,其中包含用户的ID和评分历史:
```java
public class User {
private int id;
private Map<Integer, Double> ratings;
public User(int id) {
this.id = id;
ratings = new HashMap<>();
}
public int getId() {
return id;
}
public void addRating(int movieId, double rating) {
ratings.put(movieId, rating);
}
public Double getRating(int movieId) {
return ratings.get(movieId);
}
public Set<Integer> getRatedMovies() {
return ratings.keySet();
}
}
```
接下来,我们需要定义一个Movie类来表示电影,其中包含电影的ID和用户评分历史。
```java
public class Movie {
private int id;
private Map<Integer, Double> ratings;
public Movie(int id) {
this.id = id;
ratings = new HashMap<>();
}
public int getId() {
return id;
}
public void addRating(int userId, double rating) {
ratings.put(userId, rating);
}
public Double getRating(int userId) {
return ratings.get(userId);
}
public Set<Integer> getRatedUsers() {
return ratings.keySet();
}
}
```
然后,我们需要定义一个Rating类来表示用户对电影的评分。它包含用户ID、电影ID和评分值。
```java
public class Rating {
private int userId;
private int movieId;
private double rating;
public Rating(int userId, int movieId, double rating) {
this.userId = userId;
this.movieId = movieId;
this.rating = rating;
}
public int getUserId() {
return userId;
}
public int getMovieId() {
return movieId;
}
public double getRating() {
return rating;
}
}
```
接下来,我们需要实现协同过滤算法。我们可以使用皮尔逊相关系数来计算用户之间的相似度。对于一个用户,我们可以计算他与其他用户的相似度,并选择最相似的K个用户。然后,我们可以使用这K个用户的评分来预测该用户对某个电影的评分。
```java
public class CF {
private List<User> users;
private List<Movie> movies;
private List<Rating> ratings;
public CF(List<User> users, List<Movie> movies, List<Rating> ratings) {
this.users = users;
this.movies = movies;
this.ratings = ratings;
}
public double predictRating(int userId, int movieId, int k) {
User user = getUserById(userId);
Movie movie = getMovieById(movieId);
Set<Integer> ratedMovies = user.getRatedMovies();
Map<Integer, Double> similarities = new HashMap<>();
for (User otherUser : users) {
if (otherUser.getId() == userId || otherUser.getRatedMovies().isEmpty()) {
continue;
}
double similarity = pearson(user, otherUser);
similarities.put(otherUser.getId(), similarity);
}
PriorityQueue<Map.Entry<Integer, Double>> pq = new PriorityQueue<>((a, b) -> Double.compare(b.getValue(), a.getValue()));
for (Map.Entry<Integer, Double> entry : similarities.entrySet()) {
pq.offer(entry);
}
double sum = 0;
double weightSum = 0;
int count = 0;
while (!pq.isEmpty() && count < k) {
Map.Entry<Integer, Double> entry = pq.poll();
int otherUserId = entry.getKey();
double similarity = entry.getValue();
if (!ratedMovies.contains(movieId) && !getMovieById(movieId).getRatedUsers().contains(otherUserId)) {
continue;
}
double rating = getRating(otherUserId, movieId);
if (rating == 0) {
continue;
}
sum += similarity * rating;
weightSum += similarity;
count++;
}
if (count == 0 || weightSum == 0) {
return 0;
}
double predictedRating = sum / weightSum;
return predictedRating;
}
private User getUserById(int userId) {
for (User user : users) {
if (user.getId() == userId) {
return user;
}
}
return null;
}
private Movie getMovieById(int movieId) {
for (Movie movie : movies) {
if (movie.getId() == movieId) {
return movie;
}
}
return null;
}
private double getRating(int userId, int movieId) {
for (Rating rating : ratings) {
if (rating.getUserId() == userId && rating.getMovieId() == movieId) {
return rating.getRating();
}
}
return 0;
}
private double mean(Set<Integer> set) {
double sum = 0;
for (int x : set) {
sum += x;
}
return sum / set.size();
}
private double pearson(User u1, User u2) {
Set<Integer> commonMovies = new HashSet<>(u1.getRatedMovies());
commonMovies.retainAll(u2.getRatedMovies());
if (commonMovies.isEmpty()) {
return 0;
}
double sum1 = 0;
double sum2 = 0;
double sum1Sq = 0;
double sum2Sq = 0;
double pSum = 0;
int n = commonMovies.size();
for (int movieId : commonMovies) {
double rating1 = u1.getRating(movieId);
double rating2 = u2.getRating(movieId);
sum1 += rating1;
sum2 += rating2;
sum1Sq += Math.pow(rating1, 2);
sum2Sq += Math.pow(rating2, 2);
pSum += rating1 * rating2;
}
double num = pSum - (sum1 * sum2 / n);
double den = Math.sqrt((sum1Sq - Math.pow(sum1, 2) / n) * (sum2Sq - Math.pow(sum2, 2) / n));
if (den == 0) {
return 0;
}
double r = num / den;
return r;
}
}
```
最后,我们可以使用CF类来预测用户的评分。下面是一个示例:
```java
public class Main {
public static void main(String[] args) {
User u1 = new User(1);
u1.addRating(1, 5);
u1.addRating(2, 3);
u1.addRating(3, 4);
u1.addRating(4, 4);
User u2 = new User(2);
u2.addRating(1, 3);
u2.addRating(2, 1);
u2.addRating(3, 2);
u2.addRating(4, 3);
User u3 = new User(3);
u3.addRating(1, 4);
u3.addRating(2, 3);
u3.addRating(3, 4);
List<User> users = new ArrayList<>();
users.add(u1);
users.add(u2);
users.add(u3);
Movie m1 = new Movie(1);
m1.addRating(1, 5);
m1.addRating(2, 3);
m1.addRating(3, 4);
Movie m2 = new Movie(2);
m2.addRating(1, 3);
m2.addRating(2, 1);
m2.addRating(3, 2);
m2.addRating(4, 4);
Movie m3 = new Movie(3);
m3.addRating(1, 4);
m3.addRating(2, 3);
m3.addRating(3, 4);
List<Movie> movies = new ArrayList<>();
movies.add(m1);
movies.add(m2);
movies.add(m3);
List<Rating> ratings = new ArrayList<>();
ratings.add(new Rating(1, 1, 5));
ratings.add(new Rating(1, 2, 3));
ratings.add(new Rating(1, 3, 4));
ratings.add(new Rating(2, 1, 3));
ratings.add(new Rating(2, 2, 1));
ratings.add(new Rating(2, 3, 2));
ratings.add(new Rating(3, 1, 4));
ratings.add(new Rating(3, 2, 3));
ratings.add(new Rating(3, 3, 4));
CF cf = new CF(users, movies, ratings);
double predictedRating = cf.predictRating(1, 2, 2);
System.out.println("Predicted rating: " + predictedRating);
}
}
```
在这个示例中,我们创建了三个用户和三个电影,并添加了一些评分。然后,我们使用CF类来预测用户1对电影2的评分,并设置K为2,即使用最相似的两个用户的评分来进行预测。
阅读全文