基于内容推荐算法java代码
时间: 2023-07-29 16:09:26 浏览: 55
基于内容推荐算法可以使用TF-IDF(词频-逆文档频率)算法实现。这里提供一个简单的Java代码实现:
```java
import java.util.*;
public class ContentBasedRecommendation {
// 计算文档的TF-IDF值
public static Map<String, Double> calculateTFIDF(List<String> document, List<List<String>> corpus) {
Map<String, Double> tfidfMap = new HashMap<>();
for (String word : document) {
double tf = calculateTF(word, document);
double idf = calculateIDF(word, corpus);
double tfidf = tf * idf;
tfidfMap.put(word, tfidf);
}
return tfidfMap;
}
// 计算词的TF值
public static double calculateTF(String word, List<String> document) {
int count = 0;
for (String w : document) {
if (w.equals(word)) {
count++;
}
}
return (double) count / document.size();
}
// 计算词的IDF值
public static double calculateIDF(String word, List<List<String>> corpus) {
int count = 0;
for (List<String> document : corpus) {
if (document.contains(word)) {
count++;
}
}
return Math.log10((double) corpus.size() / count);
}
// 计算文档之间的相似度
public static double calculateSimilarity(Map<String, Double> doc1, Map<String, Double> doc2) {
double dotProduct = 0.0;
double normDoc1 = 0.0;
double normDoc2 = 0.0;
for (Map.Entry<String, Double> entry : doc1.entrySet()) {
String word = entry.getKey();
double tfidf1 = entry.getValue();
double tfidf2 = doc2.getOrDefault(word, 0.0);
dotProduct += tfidf1 * tfidf2;
normDoc1 += tfidf1 * tfidf1;
}
for (Map.Entry<String, Double> entry : doc2.entrySet()) {
double tfidf2 = entry.getValue();
normDoc2 += tfidf2 * tfidf2;
}
return dotProduct / (Math.sqrt(normDoc1) * Math.sqrt(normDoc2));
}
public static void main(String[] args) {
// 示例文档
List<String> doc1 = Arrays.asList("apple", "banana", "orange", "pear");
List<String> doc2 = Arrays.asList("apple", "banana", "orange");
List<String> doc3 = Arrays.asList("apple", "banana", "pear");
List<String> doc4 = Arrays.asList("apple", "pear");
// 示例文档集合
List<List<String>> corpus = Arrays.asList(doc1, doc2, doc3, doc4);
// 计算文档的TF-IDF值
Map<String, Double> tfidf1 = calculateTFIDF(doc1, corpus);
Map<String, Double> tfidf2 = calculateTFIDF(doc2, corpus);
Map<String, Double> tfidf3 = calculateTFIDF(doc3, corpus);
Map<String, Double> tfidf4 = calculateTFIDF(doc4, corpus);
// 计算文档之间的相似度
double sim12 = calculateSimilarity(tfidf1, tfidf2);
double sim13 = calculateSimilarity(tfidf1, tfidf3);
double sim14 = calculateSimilarity(tfidf1, tfidf4);
System.out.println("相似度12:" + sim12);
System.out.println("相似度13:" + sim13);
System.out.println("相似度14:" + sim14);
}
}
```
以上代码实现了一个简单的基于内容推荐算法,以示例文档为例计算文档之间的相似度。实际应用中,可以使用更复杂的文本处理技术和算法来提高推荐效果。